From eb7c0091e1f786bd3176475c9a191f22c41ba6f6 Mon Sep 17 00:00:00 2001 From: leonidk Date: Mon, 24 Mar 2008 11:04:42 +0000 Subject: [PATCH] [MLX4] added a driver for Mellanox Tech. ConnectX HCAs support git-svn-id: svn://openib.tc.cornell.edu/gen1@1014 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- branches/ConnectX/hw/dirs | 3 +- branches/ConnectX/hw/mlx4/dirs | 3 + branches/ConnectX/hw/mlx4/inc/mx_abi.h | 180 ++ branches/ConnectX/hw/mlx4/inc/public.h | 136 + branches/ConnectX/hw/mlx4/inc/user.h | 95 + branches/ConnectX/hw/mlx4/kernel/bus/bus.mof | 27 + branches/ConnectX/hw/mlx4/kernel/bus/bus.rc | 16 + branches/ConnectX/hw/mlx4/kernel/bus/drv.c | 994 ++++++ branches/ConnectX/hw/mlx4/kernel/bus/drv.h | 221 ++ branches/ConnectX/hw/mlx4/kernel/bus/makefile | 8 + .../ConnectX/hw/mlx4/kernel/bus/makefile.inc | 10 + .../ConnectX/hw/mlx4/kernel/bus/mlx4_bus.cdf | 9 + .../ConnectX/hw/mlx4/kernel/bus/mlx4_bus.inf | 211 ++ .../hw/mlx4/kernel/bus/mlx4_bus32.cdf | 10 + branches/ConnectX/hw/mlx4/kernel/bus/pci.c | 468 +++ branches/ConnectX/hw/mlx4/kernel/bus/pdo.c | 240 ++ .../ConnectX/hw/mlx4/kernel/bus/precomp.h | 18 + branches/ConnectX/hw/mlx4/kernel/bus/sources | 54 + branches/ConnectX/hw/mlx4/kernel/bus/wmi.c | 244 ++ .../ConnectX/hw/mlx4/kernel/bus/wpptrace.h | 116 + branches/ConnectX/hw/mlx4/kernel/core/SOURCES | 52 + branches/ConnectX/hw/mlx4/kernel/core/cache.c | 426 +++ .../ConnectX/hw/mlx4/kernel/core/core.def | 64 + branches/ConnectX/hw/mlx4/kernel/core/core.h | 12 + branches/ConnectX/hw/mlx4/kernel/core/core.rc | 48 + .../ConnectX/hw/mlx4/kernel/core/device.c | 723 +++++ .../ConnectX/hw/mlx4/kernel/core/ev_log.mc | 56 + .../ConnectX/hw/mlx4/kernel/core/ev_log.rc | 2 + branches/ConnectX/hw/mlx4/kernel/core/iobuf.c | 553 ++++ branches/ConnectX/hw/mlx4/kernel/core/l2w.c | 326 ++ .../ConnectX/hw/mlx4/kernel/core/l2w_debug.c | 205 ++ .../ConnectX/hw/mlx4/kernel/core/l2w_memory.c | 118 + .../ConnectX/hw/mlx4/kernel/core/l2w_radix.c | 74 + .../ConnectX/hw/mlx4/kernel/core/l2w_umem.c | 164 + .../ConnectX/hw/mlx4/kernel/core/makefile | 7 + .../ConnectX/hw/mlx4/kernel/core/pa_cash.c | 366 +++ .../ConnectX/hw/mlx4/kernel/core/pa_cash.h | 51 + .../ConnectX/hw/mlx4/kernel/core/packer.c | 203 ++ .../ConnectX/hw/mlx4/kernel/core/ud_header.c | 281 ++ branches/ConnectX/hw/mlx4/kernel/core/verbs.c | 336 +++ branches/ConnectX/hw/mlx4/kernel/dirs | 6 + branches/ConnectX/hw/mlx4/kernel/hca/Makefile | 6 + branches/ConnectX/hw/mlx4/kernel/hca/SOURCES | 56 + branches/ConnectX/hw/mlx4/kernel/hca/av.c | 233 ++ branches/ConnectX/hw/mlx4/kernel/hca/ca.c | 422 +++ branches/ConnectX/hw/mlx4/kernel/hca/cq.c | 210 ++ branches/ConnectX/hw/mlx4/kernel/hca/data.c | 1039 +++++++ branches/ConnectX/hw/mlx4/kernel/hca/data.h | 344 +++ branches/ConnectX/hw/mlx4/kernel/hca/debug.h | 132 + branches/ConnectX/hw/mlx4/kernel/hca/direct.c | 285 ++ branches/ConnectX/hw/mlx4/kernel/hca/drv.c | 2669 +++++++++++++++++ branches/ConnectX/hw/mlx4/kernel/hca/drv.h | 370 +++ branches/ConnectX/hw/mlx4/kernel/hca/fw.c | 476 +++ branches/ConnectX/hw/mlx4/kernel/hca/hca.mof | 59 + branches/ConnectX/hw/mlx4/kernel/hca/hca.rc | 44 + .../ConnectX/hw/mlx4/kernel/hca/makefile.inc | 10 + branches/ConnectX/hw/mlx4/kernel/hca/mcast.c | 197 ++ .../ConnectX/hw/mlx4/kernel/hca/mlx4_hca.cdf | 10 + .../ConnectX/hw/mlx4/kernel/hca/mlx4_hca.inf | 217 ++ .../hw/mlx4/kernel/hca/mlx4_hca32.cdf | 10 + branches/ConnectX/hw/mlx4/kernel/hca/mr.c | 587 ++++ branches/ConnectX/hw/mlx4/kernel/hca/pd.c | 166 + .../ConnectX/hw/mlx4/kernel/hca/precomp.h | 48 + branches/ConnectX/hw/mlx4/kernel/hca/qp.c | 391 +++ branches/ConnectX/hw/mlx4/kernel/hca/srq.c | 185 ++ branches/ConnectX/hw/mlx4/kernel/hca/verbs.c | 673 +++++ branches/ConnectX/hw/mlx4/kernel/hca/verbs.h | 72 + branches/ConnectX/hw/mlx4/kernel/hca/vp.c | 325 ++ branches/ConnectX/hw/mlx4/kernel/hca/wmi.c | 261 ++ branches/ConnectX/hw/mlx4/kernel/ib/Kconfig | 8 + .../ConnectX/hw/mlx4/kernel/ib/Makefile.lnx | 3 + branches/ConnectX/hw/mlx4/kernel/ib/SOURCES | 45 + branches/ConnectX/hw/mlx4/kernel/ib/ah.c | 130 + branches/ConnectX/hw/mlx4/kernel/ib/cq.c | 577 ++++ .../ConnectX/hw/mlx4/kernel/ib/doorbell.c | 217 ++ branches/ConnectX/hw/mlx4/kernel/ib/ib.def | 11 + branches/ConnectX/hw/mlx4/kernel/ib/ib.rc | 47 + branches/ConnectX/hw/mlx4/kernel/ib/mad.c | 262 ++ branches/ConnectX/hw/mlx4/kernel/ib/main.c | 656 ++++ branches/ConnectX/hw/mlx4/kernel/ib/makefile | 7 + branches/ConnectX/hw/mlx4/kernel/ib/mlx4_ib.h | 322 ++ branches/ConnectX/hw/mlx4/kernel/ib/mr.c | 273 ++ branches/ConnectX/hw/mlx4/kernel/ib/qp.c | 1726 +++++++++++ branches/ConnectX/hw/mlx4/kernel/ib/srq.c | 359 +++ .../ConnectX/hw/mlx4/kernel/inc/bus_intf.h | 17 + branches/ConnectX/hw/mlx4/kernel/inc/cmd.h | 177 ++ branches/ConnectX/hw/mlx4/kernel/inc/cq.h | 145 + branches/ConnectX/hw/mlx4/kernel/inc/device.h | 360 +++ .../ConnectX/hw/mlx4/kernel/inc/doorbell.h | 83 + branches/ConnectX/hw/mlx4/kernel/inc/driver.h | 59 + .../ConnectX/hw/mlx4/kernel/inc/ib_cache.h | 116 + branches/ConnectX/hw/mlx4/kernel/inc/ib_mad.h | 657 ++++ .../ConnectX/hw/mlx4/kernel/inc/ib_pack.h | 245 ++ branches/ConnectX/hw/mlx4/kernel/inc/ib_smi.h | 131 + .../ConnectX/hw/mlx4/kernel/inc/ib_verbs.h | 1845 ++++++++++++ .../ConnectX/hw/mlx4/kernel/inc/ib_verbs_ex.h | 98 + branches/ConnectX/hw/mlx4/kernel/inc/qp.h | 293 ++ branches/ConnectX/hw/mlx4/kernel/inc/srq.h | 42 + branches/ConnectX/hw/mlx4/kernel/iobuf.h | 53 + branches/ConnectX/hw/mlx4/kernel/l2w.h | 311 ++ branches/ConnectX/hw/mlx4/kernel/l2w_atomic.h | 47 + branches/ConnectX/hw/mlx4/kernel/l2w_bit.h | 186 ++ branches/ConnectX/hw/mlx4/kernel/l2w_bitmap.h | 112 + branches/ConnectX/hw/mlx4/kernel/l2w_debug.h | 45 + branches/ConnectX/hw/mlx4/kernel/l2w_list.h | 194 ++ branches/ConnectX/hw/mlx4/kernel/l2w_memory.h | 332 ++ branches/ConnectX/hw/mlx4/kernel/l2w_pci.h | 108 + .../ConnectX/hw/mlx4/kernel/l2w_pcipool.h | 102 + branches/ConnectX/hw/mlx4/kernel/l2w_radix.h | 25 + .../ConnectX/hw/mlx4/kernel/l2w_spinlock.h | 148 + branches/ConnectX/hw/mlx4/kernel/l2w_sync.h | 164 + branches/ConnectX/hw/mlx4/kernel/l2w_time.h | 17 + branches/ConnectX/hw/mlx4/kernel/l2w_umem.h | 34 + branches/ConnectX/hw/mlx4/kernel/mlx4_debug.h | 193 ++ .../ConnectX/hw/mlx4/kernel/net/Makefile.lnx | 4 + branches/ConnectX/hw/mlx4/kernel/net/SOURCES | 53 + branches/ConnectX/hw/mlx4/kernel/net/alloc.c | 184 ++ branches/ConnectX/hw/mlx4/kernel/net/catas.c | 180 ++ branches/ConnectX/hw/mlx4/kernel/net/cmd.c | 517 ++++ branches/ConnectX/hw/mlx4/kernel/net/cq.c | 260 ++ branches/ConnectX/hw/mlx4/kernel/net/eq.c | 723 +++++ branches/ConnectX/hw/mlx4/kernel/net/fw.c | 841 ++++++ branches/ConnectX/hw/mlx4/kernel/net/fw.h | 165 + branches/ConnectX/hw/mlx4/kernel/net/icm.c | 451 +++ branches/ConnectX/hw/mlx4/kernel/net/icm.h | 132 + branches/ConnectX/hw/mlx4/kernel/net/intf.c | 170 ++ branches/ConnectX/hw/mlx4/kernel/net/main.c | 959 ++++++ branches/ConnectX/hw/mlx4/kernel/net/makefile | 7 + branches/ConnectX/hw/mlx4/kernel/net/mcg.c | 378 +++ branches/ConnectX/hw/mlx4/kernel/net/mlx4.h | 366 +++ branches/ConnectX/hw/mlx4/kernel/net/mr.c | 635 ++++ branches/ConnectX/hw/mlx4/kernel/net/net.def | 16 + branches/ConnectX/hw/mlx4/kernel/net/net.rc | 47 + branches/ConnectX/hw/mlx4/kernel/net/pd.c | 98 + .../ConnectX/hw/mlx4/kernel/net/profile.c | 236 ++ branches/ConnectX/hw/mlx4/kernel/net/qp.c | 301 ++ branches/ConnectX/hw/mlx4/kernel/net/reset.c | 185 ++ branches/ConnectX/hw/mlx4/kernel/net/srq.c | 256 ++ branches/ConnectX/hw/mlx4/kernel/vc.h | 76 + .../mlx4/kernel_patches/core_0020_csum.patch | 51 + .../core_0025_qp_create_flags.patch | 53 + .../mlx4/kernel_patches/core_0030_lso.patch | 66 + .../kernel_patches/mlx4_0010_add_wc.patch | 312 ++ .../mlx4_0030_checksum_offload.patch | 125 + .../kernel_patches/mlx4_0045_qp_flags.patch | 76 + .../mlx4/kernel_patches/mlx4_0050_lso.patch | 249 ++ .../mlx4_0170_shrinking_wqe.patch | 509 ++++ branches/ConnectX/hw/mlx4/readme.txt | 59 + branches/ConnectX/hw/mlx4/user/Makefile | 7 + branches/ConnectX/hw/mlx4/user/SOURCES | 65 + branches/ConnectX/hw/mlx4/user/buf.c | 51 + branches/ConnectX/hw/mlx4/user/cq.c | 481 +++ branches/ConnectX/hw/mlx4/user/dbrec.c | 147 + branches/ConnectX/hw/mlx4/user/doorbell.h | 55 + branches/ConnectX/hw/mlx4/user/l2w.h | 151 + branches/ConnectX/hw/mlx4/user/mlx4.c | 321 ++ branches/ConnectX/hw/mlx4/user/mlx4.def | 6 + branches/ConnectX/hw/mlx4/user/mlx4.h | 342 +++ branches/ConnectX/hw/mlx4/user/mlx4_debug.c | 87 + branches/ConnectX/hw/mlx4/user/mlx4_debug.h | 142 + branches/ConnectX/hw/mlx4/user/qp.c | 755 +++++ branches/ConnectX/hw/mlx4/user/srq.c | 215 ++ branches/ConnectX/hw/mlx4/user/verbs.c | 1539 ++++++++++ branches/ConnectX/hw/mlx4/user/verbs.h | 446 +++ branches/ConnectX/hw/mlx4/user/wqe.h | 120 + 165 files changed, 41284 insertions(+), 1 deletion(-) create mode 100644 branches/ConnectX/hw/mlx4/dirs create mode 100644 branches/ConnectX/hw/mlx4/inc/mx_abi.h create mode 100644 branches/ConnectX/hw/mlx4/inc/public.h create mode 100644 branches/ConnectX/hw/mlx4/inc/user.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/bus.mof create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/bus.rc create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/drv.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/drv.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/makefile create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/makefile.inc create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.cdf create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.inf create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus32.cdf create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/pci.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/pdo.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/precomp.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/sources create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/wmi.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/bus/wpptrace.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/SOURCES create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/cache.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/core.def create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/core.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/core.rc create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/device.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/ev_log.mc create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/ev_log.rc create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/iobuf.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/l2w.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/l2w_debug.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/l2w_memory.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/l2w_radix.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/l2w_umem.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/makefile create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/pa_cash.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/pa_cash.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/packer.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/ud_header.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/core/verbs.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/dirs create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/Makefile create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/SOURCES create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/av.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/ca.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/cq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/data.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/data.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/debug.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/direct.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/drv.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/drv.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/fw.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/hca.mof create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/hca.rc create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/makefile.inc create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/mcast.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.cdf create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.inf create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca32.cdf create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/mr.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/pd.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/precomp.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/qp.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/srq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/verbs.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/verbs.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/vp.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/hca/wmi.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/Kconfig create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/Makefile.lnx create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/SOURCES create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/ah.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/cq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/doorbell.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/ib.def create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/ib.rc create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/mad.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/main.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/makefile create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/mlx4_ib.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/mr.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/qp.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/ib/srq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/bus_intf.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/cmd.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/cq.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/device.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/doorbell.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/driver.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/ib_cache.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/ib_mad.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/ib_pack.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/ib_smi.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs_ex.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/qp.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/inc/srq.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/iobuf.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_atomic.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_bit.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_bitmap.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_debug.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_list.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_memory.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_pci.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_pcipool.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_radix.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_spinlock.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_sync.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_time.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/l2w_umem.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/mlx4_debug.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/Makefile.lnx create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/SOURCES create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/alloc.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/catas.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/cmd.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/cq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/eq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/fw.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/fw.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/icm.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/icm.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/intf.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/main.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/makefile create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/mcg.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/mlx4.h create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/mr.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/net.def create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/net.rc create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/pd.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/profile.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/qp.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/reset.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/net/srq.c create mode 100644 branches/ConnectX/hw/mlx4/kernel/vc.h create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/core_0020_csum.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/core_0025_qp_create_flags.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/core_0030_lso.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0010_add_wc.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0030_checksum_offload.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0045_qp_flags.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0050_lso.patch create mode 100644 branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0170_shrinking_wqe.patch create mode 100644 branches/ConnectX/hw/mlx4/readme.txt create mode 100644 branches/ConnectX/hw/mlx4/user/Makefile create mode 100644 branches/ConnectX/hw/mlx4/user/SOURCES create mode 100644 branches/ConnectX/hw/mlx4/user/buf.c create mode 100644 branches/ConnectX/hw/mlx4/user/cq.c create mode 100644 branches/ConnectX/hw/mlx4/user/dbrec.c create mode 100644 branches/ConnectX/hw/mlx4/user/doorbell.h create mode 100644 branches/ConnectX/hw/mlx4/user/l2w.h create mode 100644 branches/ConnectX/hw/mlx4/user/mlx4.c create mode 100644 branches/ConnectX/hw/mlx4/user/mlx4.def create mode 100644 branches/ConnectX/hw/mlx4/user/mlx4.h create mode 100644 branches/ConnectX/hw/mlx4/user/mlx4_debug.c create mode 100644 branches/ConnectX/hw/mlx4/user/mlx4_debug.h create mode 100644 branches/ConnectX/hw/mlx4/user/qp.c create mode 100644 branches/ConnectX/hw/mlx4/user/srq.c create mode 100644 branches/ConnectX/hw/mlx4/user/verbs.c create mode 100644 branches/ConnectX/hw/mlx4/user/verbs.h create mode 100644 branches/ConnectX/hw/mlx4/user/wqe.h diff --git a/branches/ConnectX/hw/dirs b/branches/ConnectX/hw/dirs index 5905f6c2..c39aa8b6 100644 --- a/branches/ConnectX/hw/dirs +++ b/branches/ConnectX/hw/dirs @@ -1,2 +1,3 @@ DIRS=\ - mthca + mthca \ + mlx4 diff --git a/branches/ConnectX/hw/mlx4/dirs b/branches/ConnectX/hw/mlx4/dirs new file mode 100644 index 00000000..5927717d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/dirs @@ -0,0 +1,3 @@ +DIRS= \ + kernel \ + user \ No newline at end of file diff --git a/branches/ConnectX/hw/mlx4/inc/mx_abi.h b/branches/ConnectX/hw/mlx4/inc/mx_abi.h new file mode 100644 index 00000000..dcab4be3 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/inc/mx_abi.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mx_abi.h 2002 2007-03-26 09:46:23Z sleybo $ + */ + +#ifndef MX_ABI_H +#define MX_ABI_H + +#include +#include "user.h" + +#pragma warning( disable : 4201) + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * Specifically: + * - Do not use pointer types -- pass pointers in uint64_t instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. + */ + +struct ibv_get_context_resp { + + // mmap UAR + uint64_t uar_addr; + + // mmap Blue Flame + uint64_t bf_page; + int bf_buf_size; + int bf_offset; + + // mlx4_query_device result + int max_qp_wr; + int max_sge; + int max_cqe; + + // general parameters + uint32_t vend_id; + uint16_t dev_id; + uint16_t bf_reg_size; + uint16_t bf_regs_per_page; + uint16_t reserved1; + + // ibv_cmd_get_context result + uint32_t qp_tab_size; + + uint32_t reserved2; +}; + +struct ibv_alloc_pd_resp { + uint64_t pd_handle; + uint32_t pdn; + uint32_t reserved; +}; + +struct ibv_reg_mr { + uint64_t start; + uint64_t length; + uint64_t hca_va; + uint32_t access_flags; + uint32_t pdn; + uint64_t pd_handle; +}; + +struct ibv_reg_mr_resp { + uint64_t mr_handle; + uint32_t lkey; + uint32_t rkey; +}; + + +struct ibv_create_cq { + // struct ib_uverbs_create_cq + uint32_t cqe; + uint32_t reserved; + struct mlx4_ib_create_cq; +}; + +struct ibv_create_cq_resp { + // struct ib_uverbs_create_cq_resp + uint64_t cq_handle; + uint32_t cqe; + struct mlx4_ib_create_cq_resp; +}; + +struct ibv_create_srq { + // struct ib_uverbs_create_srq + uint64_t pd_handle; + uint32_t max_wr; + uint32_t max_sge; + uint32_t srq_limit; + uint32_t reserved; + struct mlx4_ib_create_srq; +}; + +struct ibv_create_srq_resp { + // struct ib_uverbs_create_srq_resp + uint64_t srq_handle; + uint32_t max_wr; + uint32_t max_sge; + struct mlx4_ib_create_srq_resp; +}; + +struct ibv_create_qp { + // struct ib_uverbs_create_qp + uint64_t pd_handle; + uint64_t send_cq_handle; + uint64_t recv_cq_handle; + uint64_t srq_handle; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; + uint8_t sq_sig_all; + uint8_t qp_type; + uint8_t is_srq; + uint8_t reserved0; + struct mlx4_ib_create_qp; +}; + +struct ibv_create_qp_resp { + // struct ib_uverbs_create_qp_resp + uint64_t qp_handle; + uint32_t qpn; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct ibv_modify_qp_resp { + enum ibv_qp_attr_mask attr_mask; + uint8_t qp_state; + uint8_t reserved[3]; +}; + +struct ibv_create_ah_resp { + uint64_t start; +}; + +#pragma warning( default : 4201) + +#endif /* MX_ABI_H */ + diff --git a/branches/ConnectX/hw/mlx4/inc/public.h b/branches/ConnectX/hw/mlx4/inc/public.h new file mode 100644 index 00000000..e2ff2fef --- /dev/null +++ b/branches/ConnectX/hw/mlx4/inc/public.h @@ -0,0 +1,136 @@ +/*++ +Copyright (c) 1990-2000 Microsoft Corporation All Rights Reserved + +Module Name: + + public.h + +Abstract: + + This module contains the common declarations shared by driver + and user applications. + +Environment: + + user and kernel + +--*/ + +// +// Define a WMI GUID to get MLX4_HCA info. +// (used in hca\wmi.c) +// + +// {2C4C8445-E4A6-45bc-889B-E5E93551DDAF} +DEFINE_GUID(MLX4_HCA_WMI_STD_DATA_GUID, +0x2c4c8445, 0xe4a6, 0x45bc, 0x88, 0x9b, 0xe5, 0xe9, 0x35, 0x51, 0xdd, 0xaf); + + + +// +// Define a WMI GUID to get MLX4_BUS info. +// (used in bus\wmi.c) +// + +// {3337968C-F117-4289-84C2-04EF74CBAD77} +DEFINE_GUID(MLX4_BUS_WMI_STD_DATA_GUID, +0x3337968c, 0xf117, 0x4289, 0x84, 0xc2, 0x4, 0xef, 0x74, 0xcb, 0xad, 0x77); + + + +// +// Define a GUID for MLX4_BUS upper (IB) interface. +// (used in hca\drv.c) +// + +// {48AC3404-269E-4ab0-B5F3-9EF15AA79D0C} +DEFINE_GUID(MLX4_BUS_IB_INTERFACE_GUID, +0x48ac3404, 0x269e, 0x4ab0, 0xb5, 0xf3, 0x9e, 0xf1, 0x5a, 0xa7, 0x9d, 0xc); + + + +// +// Define the MLX4_BUS type GUID. +// (used in bus\drv.c for responding to the IRP_MN_QUERY_BUS_INFORMATION) +// + +// {CF9E3C49-48D1-45b5-ABD7-CBCA7D954DF4} +DEFINE_GUID(MLX4_BUS_TYPE_GUID, +0xcf9e3c49, 0x48d1, 0x45b5, 0xab, 0xd7, 0xcb, 0xca, 0x7d, 0x95, 0x4d, 0xf4); + + + +// +// Installation Class for MLX4 BUS driver +// (used in bus\mlx4_bus.inf) +// + +// {714995B2-CD65-4a47-BCFE-95AC73A0D780} + + + +// +// Installation Class for MLX4 HCA driver +// (used in hca\mlx4_hca.inf) +// + +// {31B0B28A-26FF-4dca-A6FA-E767C7DFBA20} + + +#if 0 + +// +// Define an Interface Guid for mxe device class. +// This GUID is used to register (IoRegisterDeviceInterface) +// an instance of an interface so that user application +// can control the mxe device. +// + +DEFINE_GUID (GUID_DEVINTERFACE_MXE, + 0x781EF630, 0x72B2, 0x11d2, 0xB8, 0x52, 0x00, 0xC0, 0x4F, 0xAD, 0x51, 0x71); +//{781EF630-72B2-11d2-B852-00C04FAD5171} + +// +// Define a Setup Class GUID for Mxe Class. This is same +// as the TOASTSER CLASS guid in the INF files. +// +//leo +DEFINE_GUID (GUID_DEVCLASS_MXEETHER, + 0x4d36e972, 0xe325, 0x11ce, 0xBF, 0xC1, 0x08, 0x00, 0x2b, 0xE1, 0x03, 0x18); +//{4d36e972-e325-11ce-bfc1-08002be10318} + +// +// Define a WMI GUID to get mxe device info. +// + +DEFINE_GUID (MXE_WMI_STD_DATA_GUID, + 0xBBA21300L, 0x6DD3, 0x11d2, 0xB8, 0x44, 0x00, 0xC0, 0x4F, 0xAD, 0x51, 0x71); + +// +// Define a WMI GUID to represent device arrival notification WMIEvent class. +// + +DEFINE_GUID (MXE_NOTIFY_DEVICE_ARRIVAL_EVENT, + 0x1cdaff1, 0xc901, 0x45b4, 0xb3, 0x59, 0xb5, 0x54, 0x27, 0x25, 0xe2, 0x9c); +// {01CDAFF1-C901-45b4-B359-B5542725E29C} + + +//leo The Guid was taken from devguid.h +//DEFINE_GUID( GUID_DEVCLASS_INFINIBAND, 0x30ef7132L, 0xd858, 0x4a0c, 0xac, 0x24, 0xb9, 0x02, 0x8a, 0x5c, 0xca, 0x3f ); + + +#endif + +// +// GUID definition are required to be outside of header inclusion pragma to avoid +// error during precompiled headers. +// + +#ifndef __PUBLIC_H +#define __PUBLIC_H + +#define BUS_HARDWARE_IDS L"MLX4\\ConnectX_Hca\0" +#define BUSENUM_COMPATIBLE_IDS L"MLX4\\ConnectX_Hca\0" + +#endif + diff --git a/branches/ConnectX/hw/mlx4/inc/user.h b/branches/ConnectX/hw/mlx4/inc/user.h new file mode 100644 index 00000000..f6337409 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/inc/user.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_IB_USER_H +#define MLX4_IB_USER_H + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MLX4_IB_UVERBS_ABI_VERSION 3 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mlx4_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u16 bf_reg_size; + __u16 bf_regs_per_page; +}; + +struct mlx4_ib_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mlx4_ib_create_cq { + __u64 buf_addr; + __u64 db_addr; + __u64 arm_sn_addr; // Windows specific +}; + +struct mlx4_ib_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mlx4_ib_resize_cq { + __u64 buf_addr; +}; + +struct mlx4_ib_create_srq { + __u64 buf_addr; + __u64 db_addr; +}; + +struct mlx4_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mlx4_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; + __u8 reserved[5]; +}; + +#endif /* MLX4_IB_USER_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/bus.mof b/branches/ConnectX/hw/mlx4/kernel/bus/bus.mof new file mode 100644 index 00000000..f5b1f1a6 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/bus.mof @@ -0,0 +1,27 @@ +#PRAGMA AUTORECOVER + +[Dynamic, Provider("WMIProv"), + WMI, + Description("Mlx4 Bus driver information"), + guid("{3337968C-F117-4289-84C2-04EF74CBAD77}"), + locale("MS\\0x409")] +class Mlx4BusInformation +{ + [key, read] + string InstanceName; + [read] boolean Active; + + [WmiDataId(1), + read, + Description("The DebugPrintLevel property indicates the debug output level of MLX4_BUS device.")] + uint32 DebugPrintLevel; + + [WmiDataId(2), + read, + write, + Description("The DebugPrintLevel property indicates the debug output flags of MLX4_BUS device.")] + uint32 DebugPrintFlags; + +}; + + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/bus.rc b/branches/ConnectX/hw/mlx4/kernel/bus/bus.rc new file mode 100644 index 00000000..f6a08bed --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/bus.rc @@ -0,0 +1,16 @@ +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_UNKNOWN +#ifdef DBG +#define VER_FILEDESCRIPTION_STR "MLX4 Bus Driver (checked)" +#else +#define VER_FILEDESCRIPTION_STR "MLX4 Bus Driver" +#endif +#define VER_INTERNALNAME_STR "mlx4_bus.sys" +#define VER_ORIGINALFILENAME_STR "mlx4_bus.sys" +#include + +#include "core\ev_log.rc" + + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/drv.c b/branches/ConnectX/hw/mlx4/kernel/bus/drv.c new file mode 100644 index 00000000..51e2335d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/drv.c @@ -0,0 +1,994 @@ +/*++ + +Copyright (c) 2003 Microsoft Corporation All Rights Reserved + +Module Name: + + BUSENUM.C + +Abstract: + + This module contains routines to handle the function driver + aspect of the bus driver. This sample is functionally + equivalent to the WDM mxe bus driver. + +Environment: + + kernel mode only + +--*/ + +#include "precomp.h" +#include +#include + +#if defined(EVENT_TRACING) +#include "drv.tmh" +#endif + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (INIT, DriverEntry) +#pragma alloc_text (PAGE, EvtDeviceAdd) +#pragma alloc_text (PAGE, EvtDriverUnload) +#pragma alloc_text (PAGE, EvtDeviceD0Entry) +#pragma alloc_text (PAGE, EvtDeviceD0Exit) +#pragma alloc_text (PAGE, EvtPrepareHardware) +#pragma alloc_text (PAGE, EvtReleaseHardware) +#endif + +#define DRV_VERSION "1.0" +#define DRV_RELDATE "02/01/2008" + +GLOBALS g = {0}; +uint32_t g_mlx4_dbg_flags = 0xffff; +uint32_t g_mlx4_dbg_level = TRACE_LEVEL_INFORMATION; +WCHAR g_wlog_buf[ MAX_LOG_BUF_LEN ]; +UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; + +#ifndef USE_WDM_INTERRUPTS + +typedef struct { + int int_num; + PFDO_DEVICE_DATA p_fdo; + struct mlx4_eq * eq; +} INTERRUPT_DATA, *PINTERRUPT_DATA; + +WDF_DECLARE_CONTEXT_TYPE(INTERRUPT_DATA); + +NTSTATUS +EvtEnableInterrupt( + IN WDFINTERRUPT Interrupt, + IN WDFDEVICE AssociatedDevice + ) +{ + UNUSED_PARAM(Interrupt); + UNUSED_PARAM(AssociatedDevice); + MLX4_ENTER(MLX4_DBG_DRV); + MLX4_EXIT( MLX4_DBG_DRV ); + return STATUS_SUCCESS; +} + +NTSTATUS +EvtDisableInterrupt ( + IN WDFINTERRUPT Interrupt, + IN WDFDEVICE AssociatedDevice + ) +{ + UNUSED_PARAM(Interrupt); + UNUSED_PARAM(AssociatedDevice); + MLX4_ENTER(MLX4_DBG_DRV); + MLX4_EXIT( MLX4_DBG_DRV ); + return STATUS_SUCCESS; +} + +BOOLEAN +EvtInterruptIsr( + IN WDFINTERRUPT Interrupt, + IN ULONG MessageID + ) +{ + BOOLEAN isr_handled = FALSE; + PINTERRUPT_DATA p_isr_ctx = WdfObjectGetTypedContext( Interrupt, INTERRUPT_DATA ); + + UNUSED_PARAM(MessageID); + +// MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, ("Fdo %p\n", p_isr_ctx->p_fdo)); + if (p_isr_ctx->eq && p_isr_ctx->eq->isr) + isr_handled = p_isr_ctx->eq->isr( p_isr_ctx->eq->eq_ix, p_isr_ctx->eq->ctx ); + + return isr_handled; +} + +#endif + +NTSTATUS +__create_child( + __in WDFDEVICE Device, + __in PWCHAR HardwareIds, + __in ULONG SerialNo + ) + +/*++ + +Routine Description: + + The user application has told us that a new device on the bus has arrived. + + We therefore need to create a new PDO, initialize it, add it to the list + of PDOs for this FDO bus, and then tell Plug and Play that all of this + happened so that it will start sending prodding IRPs. + +--*/ + +{ + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN unique = TRUE; + WDFDEVICE hChild; + PPDO_DEVICE_DATA p_pdo; + PFDO_DEVICE_DATA p_fdo; + + PAGED_CODE (); + MLX4_ENTER(MLX4_DBG_DRV); + + // + // First make sure that we don't already have another device with the + // same serial number. + // Framework creates a collection of all the child devices we have + // created so far. So acquire the handle to the collection and lock + // it before walking the item. + // + p_fdo = FdoGetData(Device); + hChild = NULL; + + // + // We need an additional lock to synchronize addition because + // WdfFdoLockStaticChildListForIteration locks against anyone immediately + // updating the static child list (the changes are put on a queue until the + // list has been unlocked). This type of lock does not enforce our concept + // of unique IDs on the bus (ie SerialNo). + // + // Without our additional lock, 2 threads could execute this function, both + // find that the requested SerialNo is not in the list and attempt to add + // it. If that were to occur, 2 PDOs would have the same unique SerialNo, + // which is incorrect. + // + // We must use a passive level lock because you can only call WdfDeviceCreate + // at PASSIVE_LEVEL. + // + WdfWaitLockAcquire(p_fdo->ChildLock, NULL); + WdfFdoLockStaticChildListForIteration(Device); + + while ((hChild = WdfFdoRetrieveNextStaticChild(Device, + hChild, WdfRetrieveAddedChildren)) != NULL) { + // + // WdfFdoRetrieveNextStaticChild returns reported and to be reported + // children (ie children who have been added but not yet reported to PNP). + // + // A surprise removed child will not be returned in this list. + // + p_pdo = PdoGetData(hChild); + p_pdo->PdoDevice = hChild; + p_pdo->p_fdo = p_fdo; + + // + // It's okay to plug in another device with the same serial number + // as long as the previous one is in a surprise-removed state. The + // previous one would be in that state after the device has been + // physically removed, if somebody has an handle open to it. + // + if (SerialNo == p_pdo->SerialNo) { + unique = FALSE; + status = STATUS_INVALID_PARAMETER; + break; + } + } + + if (unique) { + // + // Create a new child device. It is OK to create and add a child while + // the list locked for enumeration. The enumeration lock applies only + // to enumeration, not addition or removal. + // + status = create_pdo(Device, HardwareIds, SerialNo); + } + + WdfFdoUnlockStaticChildListFromIteration(Device); + WdfWaitLockRelease(p_fdo->ChildLock); + + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + + +NTSTATUS +__do_static_enumeration( + IN WDFDEVICE Device + ) +/*++ +Routine Description: + + The routine enables you to statically enumerate child devices + during start instead of running the enum.exe/notify.exe to + enumerate mxe devices. + + In order to statically enumerate, user must specify the number + of mxes in the Mxe Bus driver's device registry. The + default value is 2. + + You can also configure this value in the Mxe Bus Inf file. + +--*/ + +{ + NTSTATUS status; + + MLX4_ENTER(MLX4_DBG_DRV); + + // eventually we'll have all information about children in Registry + // DriverEntry will read it into a Global storage and + // this routine will create all the children on base on this info + + status = __create_child(Device, BUS_HARDWARE_IDS, 0 ); + + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + +NTSTATUS +EvtDeviceD0Entry( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE PreviousState + ) +{ + NTSTATUS status = STATUS_SUCCESS; + + UNUSED_PARAM(Device); + UNUSED_PARAM(PreviousState); + + MLX4_ENTER(MLX4_DBG_DRV); + + MLX4_PRINT(TRACE_LEVEL_INFORMATION, MLX4_DBG_DRV, ("PreviousState 0x%x\n", PreviousState)); + + status = __do_static_enumeration(Device); + if (!NT_SUCCESS(status)) { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("DoStaticEnumeration failed with 0x%x\n", status)); + } + + { + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + struct pci_dev *pdev = &p_fdo->pci_dev; + struct mlx4_dev *mdev = pdev->dev; + + MLX4_PRINT_EV(TRACE_LEVEL_INFORMATION ,MLX4_DBG_DRV , + ("Ven %x Dev %d Fw %d.%d.%d Drv %s (%s), BD %s\n", + (unsigned)pdev->ven_id, (unsigned)pdev->dev_id, + (int) (mdev->caps.fw_ver >> 32), + (int) (mdev->caps.fw_ver >> 16) & 0xffff, + (int) (mdev->caps.fw_ver & 0xffff), + DRV_VERSION, DRV_RELDATE, + mlx4_is_livefish(mdev) ? "Y" : "N" + )); + } + + MLX4_EXIT( MLX4_DBG_DRV ); + return STATUS_SUCCESS; +} + +NTSTATUS +EvtDeviceD0Exit( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE TargetState + ) +{ + UNUSED_PARAM(Device); + UNUSED_PARAM(TargetState); + MLX4_ENTER(MLX4_DBG_DRV); + MLX4_PRINT(TRACE_LEVEL_INFORMATION, MLX4_DBG_DRV, ("TargetState 0x%x\n", TargetState)); + MLX4_EXIT( MLX4_DBG_DRV ); + return STATUS_SUCCESS; +} + + + +/* Forwards the request to the HCA's PDO. */ +static +void +__put_bus_ifc( + IN BUS_INTERFACE_STANDARD *pBusIfc ) +{ + MLX4_ENTER(MLX4_DBG_DRV); + MLX4_PRINT(TRACE_LEVEL_INFORMATION, MLX4_DBG_DRV, ("pBusIfc=0x%p\n", pBusIfc)); + pBusIfc->InterfaceDereference( pBusIfc->Context ); + MLX4_EXIT( MLX4_DBG_DRV ); +} + +static +NTSTATUS +__get_bus_ifc( + IN PFDO_DEVICE_DATA const p_fdo, + IN const GUID* const pGuid, + OUT BUS_INTERFACE_STANDARD *pBusIfc ) +{ + NTSTATUS status; + WDFDEVICE FdoDevice = p_fdo->FdoDevice; + MLX4_ENTER(MLX4_DBG_DRV); + + status = WdfFdoQueryForInterface( FdoDevice, pGuid, (PINTERFACE)pBusIfc, + sizeof(BUS_INTERFACE_STANDARD), 1, NULL ); + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + +static +void +__put_dma_adapter( + IN PFDO_DEVICE_DATA p_fdo, + IN PDMA_ADAPTER p_dma ) +{ + UNUSED_PARAM(p_fdo); + UNUSED_PARAM(p_dma); + MLX4_ENTER(MLX4_DBG_DRV); + MLX4_EXIT( MLX4_DBG_DRV ); +} + + +// this routine releases the resources, taken in __get_resources +static +void +__put_resources( + IN PFDO_DEVICE_DATA p_fdo + ) +{ + struct pci_dev *pdev = &p_fdo->pci_dev; + MLX4_ENTER(MLX4_DBG_DRV); + + if (p_fdo->dma_adapter_taken) { + p_fdo->dma_adapter_taken = FALSE; + __put_dma_adapter( p_fdo, pdev->p_dma_adapter ); + } + + if (p_fdo->pci_bus_ifc_taken) { + p_fdo->pci_bus_ifc_taken = FALSE; + __put_bus_ifc(&pdev->bus_pci_ifc); + } + MLX4_EXIT( MLX4_DBG_DRV ); +} + +static +NTSTATUS +__get_dma_adapter( + IN PFDO_DEVICE_DATA p_fdo, + OUT PDMA_ADAPTER * pp_dma ) +{ + NTSTATUS status; + WDF_DMA_ENABLER_CONFIG dmaConfig; + + MLX4_ENTER(MLX4_DBG_DRV); + + WDF_DMA_ENABLER_CONFIG_INIT( &dmaConfig, + WdfDmaProfileScatterGather64, 0x80000000 - 1 ); + + status = WdfDmaEnablerCreate( p_fdo->FdoDevice, + &dmaConfig, WDF_NO_OBJECT_ATTRIBUTES, &p_fdo->dma_enabler ); + if (!NT_SUCCESS (status)) { + return status; + } + + *pp_dma = WdfDmaEnablerWdmGetDmaAdapter( + p_fdo->dma_enabler, WdfDmaDirectionReadFromDevice ); + + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + +// this routine fills pci_dev structure, containing all HW +// and some other necessary common resources +static +NTSTATUS +__get_resources( + IN PFDO_DEVICE_DATA p_fdo, + IN WDFCMRESLIST ResourcesRaw, + IN WDFCMRESLIST ResourcesTranslated + ) +{ + NTSTATUS status; + ULONG i, k=0; + PCM_PARTIAL_RESOURCE_DESCRIPTOR desc; + PCM_PARTIAL_RESOURCE_DESCRIPTOR desc_raw; + BUS_INTERFACE_STANDARD bus_pci_ifc; + struct pci_dev *pdev = &p_fdo->pci_dev; + + MLX4_ENTER(MLX4_DBG_DRV); + + // + // Get PCI BUS interface + // + status = __get_bus_ifc( p_fdo, &GUID_BUS_INTERFACE_STANDARD, &bus_pci_ifc ); + if( !NT_SUCCESS( status ) ) { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, + ("failed: status=0x%x\n", status)); + return status; + } + RtlCopyMemory( &pdev->bus_pci_ifc, &bus_pci_ifc, sizeof(BUS_INTERFACE_STANDARD) ); + p_fdo->pci_bus_ifc_taken = TRUE; + + // + // get HW resources + // + for (i = 0; i < WdfCmResourceListGetCount(ResourcesTranslated); i++) { + + desc = WdfCmResourceListGetDescriptor( ResourcesTranslated, i ); + desc_raw = WdfCmResourceListGetDescriptor( ResourcesRaw, i ); + + switch (desc->Type) { + + case CmResourceTypeMemory: + MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, + ("EvtPrepareHardware(Raw): Desc %d: Memory: Start %#I64x, Length %#x\n", + i, desc_raw->u.Memory.Start.QuadPart, desc_raw->u.Memory.Length )); + MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, + ("EvtPrepareHardware: Desc %d: Memory: Start %#I64x, Length %#x\n", + i, desc->u.Memory.Start.QuadPart, desc->u.Memory.Length )); + + if (k < N_BARS) { + pdev->bar[k].phys = desc->u.Memory.Start.QuadPart; + pdev->bar[k].size = (SIZE_T)desc->u.Memory.Length; + } + k++; + break; + +#ifdef USE_WDM_INTERRUPTS + case CmResourceTypeInterrupt: + pdev->int_info = *desc; + break; +#endif + + default: + // + // Ignore all other descriptors. + // + break; + } + } + if (i ==0) { + // This means that no resources are found + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("WdfCmResourceListGetCount: returned 0, quiting\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + // + // get uplink info. + // + status = pci_save_config( &pdev->bus_pci_ifc, &pdev->pci_cfg_space); + if( !NT_SUCCESS( status ) ) + { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, + ("Failed to save HCA config: status=0x%x\n", status)); + goto err; + } + pci_get_uplink_info( &pdev->pci_cfg_space, &pdev->uplink_info ); + + // + // allocate DMA adapter + // + status = __get_dma_adapter( p_fdo, &pdev->p_dma_adapter ); + if( !NT_SUCCESS( status ) ) + { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, + ("Failed to get DMA adapter: status=0x%x\n", status)); + goto err; + } + p_fdo->dma_adapter_taken = TRUE; + + // + // fill more fields in pci_dev + // + pdev->ven_id = pdev->pci_cfg_space.VendorID; + pdev->dev_id = pdev->pci_cfg_space.DeviceID; + pdev->p_self_do = WdfDeviceWdmGetDeviceObject(p_fdo->FdoDevice); + + MLX4_EXIT( MLX4_DBG_DRV ); + return STATUS_SUCCESS; +err: + __put_resources(p_fdo); + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + + +NTSTATUS +EvtPrepareHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesRaw, + IN WDFCMRESLIST ResourcesTranslated + ) +{ +#ifndef USE_WDM_INTERRUPTS + int i; +#endif + int err; + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + struct pci_dev *pdev = &p_fdo->pci_dev; + + MLX4_ENTER(MLX4_DBG_DRV); + + // get resources + status = __get_resources( p_fdo, ResourcesRaw, ResourcesTranslated ); + if( !NT_SUCCESS( status ) ) { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__get_bus_ifc failed: status=0x%x\n", status)); + goto err; + } + + // enable the card + status = pci_hca_enable( &pdev->bus_pci_ifc, &pdev->pci_cfg_space ); + if( !NT_SUCCESS( status ) ) + goto err; + + // + // init the card + // + +#ifndef USE_WDM_INTERRUPTS + // enable interrupts for start up + for ( i = 0; i < MLX4_MAX_INTERRUPTS; ++i ) + WdfInterruptEnable(p_fdo->interrupt[i].WdfInterrupt); +#endif + + // NET library + err = mlx4_init_one( &p_fdo->pci_dev ); + if (err) { + status = errno_to_ntstatus(err); + goto err; + } + + // IB library + err = mlx4_ib_init(); + if (err) { + status = errno_to_ntstatus(err); + goto err; + } + +#ifndef USE_WDM_INTERRUPTS + // + // complete filling interrupt context (for more efficiency) + // + for ( i = 0; i < MLX4_MAX_INTERRUPTS; ++i ) { + struct mlx4_priv *priv = mlx4_priv( p_fdo->pci_dev.dev ); + PINTERRUPT_DATA p_isr_ctx = WdfObjectGetTypedContext( + p_fdo->interrupt[i].WdfInterrupt, INTERRUPT_DATA ); + + p_isr_ctx->eq = &priv->eq_table.eq[i]; + } +#endif + + // + // prepare MLX4 IB interface + // + + // fill the header + p_fdo->bus_ib_ifc.Size = sizeof(MLX4_BUS_IB_INTERFACE); + p_fdo->bus_ib_ifc.Version = MLX4_BUS_IB_INTERFACE_VERSION; + // Let the framework handle reference counting. + p_fdo->bus_ib_ifc.InterfaceReference = WdfDeviceInterfaceReferenceNoOp; + p_fdo->bus_ib_ifc.InterfaceDereference = WdfDeviceInterfaceDereferenceNoOp; + + p_fdo->bus_ib_ifc.pdev = &p_fdo->pci_dev; + p_fdo->bus_ib_ifc.p_ibdev = p_fdo->pci_dev.ib_dev; + p_fdo->bus_ib_ifc.is_livefish = mlx4_is_livefish(p_fdo->pci_dev.dev); + + status = STATUS_SUCCESS; + +err: + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + +NTSTATUS +EvtReleaseHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesTranslated + ) +{ + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + + UNUSED_PARAM(ResourcesTranslated); + + MLX4_ENTER(MLX4_DBG_DRV); + + mlx4_ib_cleanup(); + mlx4_remove_one( &p_fdo->pci_dev ); + __put_resources( p_fdo ); + + MLX4_EXIT( MLX4_DBG_DRV ); + return STATUS_SUCCESS; +} + +#ifndef USE_WDM_INTERRUPTS + +static +NTSTATUS +__create_interrupt( + IN WDFDEVICE device, + IN int int_num, + IN PFN_WDF_INTERRUPT_ISR isr, + IN PFN_WDF_INTERRUPT_DPC dpc, + IN PFDO_DEVICE_DATA p_fdo, + OUT WDFINTERRUPT * p_int_obj + ) +{ + NTSTATUS Status; + + WDF_INTERRUPT_CONFIG interruptConfig; + WDF_OBJECT_ATTRIBUTES interruptAttributes; + PINTERRUPT_DATA p_isr_ctx; + + MLX4_ENTER(MLX4_DBG_DRV); + + WDF_INTERRUPT_CONFIG_INIT( &interruptConfig, isr, dpc ); + + interruptConfig.EvtInterruptEnable = EvtEnableInterrupt; + interruptConfig.EvtInterruptDisable = EvtDisableInterrupt; + + + WDF_OBJECT_ATTRIBUTES_INIT_CONTEXT_TYPE( + &interruptAttributes, INTERRUPT_DATA ); + + Status = WdfInterruptCreate( device, + &interruptConfig, &interruptAttributes, p_int_obj ); + + p_isr_ctx = WdfObjectGetTypedContext( *p_int_obj, INTERRUPT_DATA ); + p_isr_ctx->int_num = int_num; + p_isr_ctx->p_fdo = p_fdo; + p_isr_ctx->eq = NULL; + + // one can call WdfInterruptSetPolicy() to set the policy, affinity etc + + MLX4_EXIT( MLX4_DBG_DRV ); + return Status; +} + +#endif + +NTSTATUS +EvtDeviceAdd( + IN WDFDRIVER Driver, + IN PWDFDEVICE_INIT DeviceInit + ) +/*++ +Routine Description: + + EvtDeviceAdd is called by the framework in response to AddDevice + call from the PnP manager. We create and initialize a device object to + represent a new instance of mxe bus. + +Arguments: + + Driver - Handle to a framework driver object created in DriverEntry + + DeviceInit - Pointer to a framework-allocated WDFDEVICE_INIT structure. + +Return Value: + + NTSTATUS + +--*/ +{ +#ifndef USE_WDM_INTERRUPTS + int i; +#endif + WDF_OBJECT_ATTRIBUTES attributes; + NTSTATUS status; + WDFDEVICE device; + PFDO_DEVICE_DATA p_fdo; + PNP_BUS_INFORMATION busInfo; + WDF_PNPPOWER_EVENT_CALLBACKS Callbacks; + + UNREFERENCED_PARAMETER(Driver); + + PAGED_CODE (); + MLX4_ENTER(MLX4_DBG_DRV); + + // + // register PnP & Power stuff + // + WDF_PNPPOWER_EVENT_CALLBACKS_INIT(&Callbacks); + Callbacks.EvtDevicePrepareHardware = EvtPrepareHardware; + Callbacks.EvtDeviceReleaseHardware = EvtReleaseHardware; + Callbacks.EvtDeviceD0Entry = EvtDeviceD0Entry; + Callbacks.EvtDeviceD0Exit = EvtDeviceD0Exit; + + WdfDeviceInitSetPnpPowerEventCallbacks( DeviceInit, &Callbacks ); + + // + // Initialize all the properties specific to the device. + // Framework has default values for the one that are not + // set explicitly here. So please read the doc and make sure + // you are okay with the defaults. + // + WdfDeviceInitSetDeviceType(DeviceInit, FILE_DEVICE_BUS_EXTENDER); + WdfDeviceInitSetExclusive(DeviceInit, TRUE); + + // + // Initialize attributes structure to specify size and accessor function + // for storing device context. + // + WDF_OBJECT_ATTRIBUTES_INIT_CONTEXT_TYPE(&attributes, FDO_DEVICE_DATA); + + // + // Create a framework device object. In response to this call, framework + // creates a WDM deviceobject. + // + status = WdfDeviceCreate(&DeviceInit, &attributes, &device); + if (!NT_SUCCESS(status)) { + MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, + ("WdfDeviceCreate failed with 0x%x\n", status)); + goto end; + } + + // + // Get the device context. + // + p_fdo = FdoGetData(device); + RtlZeroMemory(p_fdo, sizeof(FDO_DEVICE_DATA)); + p_fdo->FdoDevice = device; + + // + // Purpose of this lock is documented in PlugInDevice routine below. + // + WDF_OBJECT_ATTRIBUTES_INIT(&attributes); + attributes.ParentObject = device; + status = WdfWaitLockCreate(&attributes, &p_fdo->ChildLock); + if (!NT_SUCCESS(status)) { + MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, + ("WdfWaitLockCreate failed with 0x%x\n", status)); + goto end; + } + + // + // This value is used in responding to the IRP_MN_QUERY_BUS_INFORMATION + // for the child devices. This is an optional information provided to + // uniquely identify the bus the device is connected. + // + busInfo.BusTypeGuid = MLX4_BUS_TYPE_GUID; + busInfo.LegacyBusType = PNPBus; + busInfo.BusNumber = 0; + + WdfDeviceSetBusInformationForChildren(device, &busInfo); + + // + // WMI + // + status = WmiRegistration(device); + if (!NT_SUCCESS(status)) { + MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, + ("WmiRegistration failed with 0x%x\n", status)); + goto end; + } + +#ifndef USE_WDM_INTERRUPTS + + // + // create interrupt objects + // + for ( i = 0; i < MLX4_MAX_INTERRUPTS; ++i ) { + status = __create_interrupt( p_fdo->FdoDevice, i, EvtInterruptIsr, + NULL, p_fdo, &p_fdo->interrupt[i].WdfInterrupt ); + if (NT_SUCCESS(status)) + p_fdo->interrupt[i].valid = TRUE; + else { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, + ("WdfInterruptCreate failed %#x\n", status )); + goto end; + } + } + +#endif + + status = STATUS_SUCCESS; + +end: + MLX4_EXIT( MLX4_DBG_DRV ); + return status; +} + + + +void +EvtDriverUnload( + IN WDFDRIVER Driver + ) +{ + MLX4_ENTER( MLX4_DBG_DRV ); + + UNUSED_PARAM( Driver ); + + core_cleanup(); + + MLX4_EXIT( MLX4_DBG_DRV ); +#if defined(EVENT_TRACING) + WPP_CLEANUP(WdfDriverWdmGetDriverObject(Driver)); +#endif + +} + +static +NTSTATUS +__read_registry(WDFDRIVER *hDriver) +{ + DECLARE_CONST_UNICODE_STRING(debugLevel, L"DebugLevel"); + DECLARE_CONST_UNICODE_STRING(debugFlags, L"DebugFlags"); + + // "log maximum number of QPs per HCA" + DECLARE_CONST_UNICODE_STRING(numQp, L"LogNumQp"); + + // "log number of RDMARC buffers per QP" + DECLARE_CONST_UNICODE_STRING(numRdmaRc, L"LogNumRdmaRc"); + + // "log maximum number of SRQs per HCA" + DECLARE_CONST_UNICODE_STRING(numSrq, L"LogNumSrq"); + + // "log maximum number of CQs per HCA" + DECLARE_CONST_UNICODE_STRING(numCq, L"LogNumCq"); + + // "log maximum number of multicast groups per HCA" + DECLARE_CONST_UNICODE_STRING(numMcg, L"LogNumMcg"); + + // "log maximum number of memory protection table entries per HCA" + DECLARE_CONST_UNICODE_STRING(numMpt, L"LogNumMpt"); + + // "log maximum number of memory translation table segments per HCA" + DECLARE_CONST_UNICODE_STRING(numMtt, L"LogNumMtt"); + + // "Enable Quality of Service support in the HCA if > 0, (default 1)" + DECLARE_CONST_UNICODE_STRING(enableQoS, L"EnableQoS"); + + ULONG value; + WDFKEY hKey = NULL; + NTSTATUS status = STATUS_SUCCESS; + + status = WdfDriverOpenParametersRegistryKey( *hDriver, + STANDARD_RIGHTS_ALL, WDF_NO_OBJECT_ATTRIBUTES, &hKey ); + + if (NT_SUCCESS (status)) { + + // + // Read general values + // + status = WdfRegistryQueryULong(hKey, &debugLevel, &value); + if (NT_SUCCESS (status)) + g_mlx4_dbg_level = g.DebugPrintLevel = value; + + status = WdfRegistryQueryULong(hKey, &debugFlags, &value); + if (NT_SUCCESS (status)) + g_mlx4_dbg_flags = g.DebugPrintFlags = value; + + status = WdfRegistryQueryULong(hKey, &numQp, &value); + if (NT_SUCCESS (status)) + g.mod_num_qp = value; + + status = WdfRegistryQueryULong(hKey, &numRdmaRc, &value); + if (NT_SUCCESS (status)) + g.mod_rdmarc_per_qp = value; + + status = WdfRegistryQueryULong(hKey, &numSrq, &value); + if (NT_SUCCESS (status)) + g.mod_num_srq = value; + + status = WdfRegistryQueryULong(hKey, &numCq, &value); + if (NT_SUCCESS (status)) + g.mod_num_cq = value; + + status = WdfRegistryQueryULong(hKey, &numMcg, &value); + if (NT_SUCCESS (status)) + g.mod_num_mcg = value; + + status = WdfRegistryQueryULong(hKey, &numMpt, &value); + if (NT_SUCCESS (status)) + g.mod_num_mpt = value; + + status = WdfRegistryQueryULong(hKey, &numMtt, &value); + if (NT_SUCCESS (status)) + g.mod_num_mtt = value; + + status = WdfRegistryQueryULong(hKey, &enableQoS, &value); + if (NT_SUCCESS (status)) + g.enable_qos = value; + else + g.enable_qos = 1; + + WdfRegistryClose(hKey); + status = STATUS_SUCCESS; + } + + return status; +} + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT DriverObject, + IN PUNICODE_STRING RegistryPath + ) +/*++ +Routine Description: + + Initialize the call backs structure of Driver Framework. + +Arguments: + + DriverObject - pointer to the driver object + + RegistryPath - pointer to a unicode string representing the path, + to driver-specific key in the registry. + +Return Value: + + NT Status Code + +--*/ +{ + int err; + WDF_DRIVER_CONFIG config; + NTSTATUS status; + WDFDRIVER hDriver; + +#if defined(EVENT_TRACING) + WPP_INIT_TRACING(DriverObject, RegistryPath); +#endif + + + // global initializations + g_mlx4_dbg_level = g.DebugPrintLevel = TRACE_LEVEL_VERBOSE; + g_mlx4_dbg_flags = g.DebugPrintFlags = 0xffff; + + MLX4_ENTER(MLX4_DBG_DRV); + MLX4_PRINT(TRACE_LEVEL_INFORMATION, MLX4_DBG_DRV, + ("Built %s %s, Version %s, RelDate %s\n", + __DATE__, __TIME__, DRV_VERSION, DRV_RELDATE)); + + mlx4_net_init(); + err = core_init(); + if (err) { + status = errno_to_ntstatus(err); + goto end; + } + + // + // Initiialize driver config to control the attributes that + // are global to the driver. Note that framework by default + // provides a driver unload routine. If you create any resources + // in the DriverEntry and want to be cleaned in driver unload, + // you can override that by specifing one in the Config structure. + // + + WDF_DRIVER_CONFIG_INIT( + &config, EvtDeviceAdd ); + config.EvtDriverUnload = EvtDriverUnload; + + // + // Create a framework driver object to represent our driver. + // + status = WdfDriverCreate(DriverObject, + RegistryPath, WDF_NO_OBJECT_ATTRIBUTES, + &config, &hDriver); + + if (!NT_SUCCESS(status)) { + MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV, ("WdfDriverCreate failed with status 0x%x\n", status)); + goto end; + } + + // + // read registry parameters + // + status = __read_registry(&hDriver); + + // we don't matter the failure in the work with Registry + status = STATUS_SUCCESS; + +end: + MLX4_EXIT( MLX4_DBG_DRV ); + return status; + +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/drv.h b/branches/ConnectX/hw/mlx4/kernel/bus/drv.h new file mode 100644 index 00000000..64d7dbd2 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/drv.h @@ -0,0 +1,221 @@ +/*++ + +Copyright (c) 2003 Microsoft Corporation All Rights Reserved + +Module Name: + + mxe_drv.h + +Abstract: + + This module contains the common private declarations + for the Mxe Bus enumerator. + +Environment: + + kernel mode only + +--*/ + +#pragma once + +#define BUSENUM_POOL_TAG (ULONG) 'suBT' +#define N_BARS 3 + +#include "net\mlx4.h" +#include "bus_intf.h" + +#if DBG +#define BUS_DEFAULT_DEBUG_OUTPUT_LEVEL 0x000FFFFF + +#else + +#define BUS_DEFAULT_DEBUG_OUTPUT_LEVEL 0x0 + +#endif + +#define BUSRESOURCENAME L"MofResourceName" + +#ifndef min +#define min(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif + +#ifndef max +#define max(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif + + +#define MLX4_MAX_INTERRUPTS MLX4_NUM_EQ + +typedef struct { + WDFINTERRUPT WdfInterrupt; + BOOLEAN valid; +} res_interrupt_t; + +// +// The device extension of the bus itself. From whence the PDO's are born. +// + +typedef struct _FDO_DEVICE_DATA +{ + BUS_WMI_STD_DATA WmiData; + WDFWAITLOCK ChildLock; + WDFDEVICE FdoDevice; + struct pci_dev pci_dev; + int pci_bus_ifc_taken; + WDFDMAENABLER dma_enabler; + int dma_adapter_taken; + res_interrupt_t interrupt[MLX4_MAX_INTERRUPTS]; + MLX4_BUS_IB_INTERFACE bus_ib_ifc; + +} FDO_DEVICE_DATA, *PFDO_DEVICE_DATA; + + +WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(FDO_DEVICE_DATA, FdoGetData) + +// +// The device extension for the PDOs. +// That's of the mxe device which this bus driver enumerates. +// + +typedef struct _PDO_DEVICE_DATA +{ + // Unique serial number of the device on the bus + ULONG SerialNo; + // WDF PDO object + WDFDEVICE PdoDevice; + // FDO context + PFDO_DEVICE_DATA p_fdo; + // MLX4 BUS IB interface + WDF_QUERY_INTERFACE_CONFIG qiMlx4Bus; + WDF_QUERY_INTERFACE_CONFIG qiPciBus; + +} PDO_DEVICE_DATA, *PPDO_DEVICE_DATA; + +WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(PDO_DEVICE_DATA, PdoGetData) + + +typedef struct _QUEUE_DATA +{ + PFDO_DEVICE_DATA FdoData; + +} QUEUE_DATA, *PQUEUE_DATA; + +WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(QUEUE_DATA, QueueGetData) + + // +// wmi.c +// + +NTSTATUS +WmiRegistration( + WDFDEVICE Device +); + +NTSTATUS +EvtStdDataSetItem( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG DataItemId, + IN ULONG InBufferSize, + IN PVOID InBuffer + ); + +NTSTATUS +EvtStdDataSetInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG InBufferSize, + IN PVOID InBuffer + ); + +NTSTATUS +EvtStdDataQueryInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG OutBufferSize, + IN PVOID OutBuffer, + OUT PULONG BufferUsed + ); + + +// +// drv.c +// + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT DriverObject, + IN PUNICODE_STRING RegistryPath + ); + +void +EvtDriverUnload( + IN WDFDRIVER Driver + ); + +NTSTATUS +EvtDeviceAdd( + IN WDFDRIVER Driver, + IN PWDFDEVICE_INIT DeviceInit + ); + +NTSTATUS +EvtPrepareHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesRaw, + IN WDFCMRESLIST ResourcesTranslated + ); + +NTSTATUS +EvtReleaseHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesTranslated + ); + +NTSTATUS +EvtDeviceD0Exit( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE TargetState + ); + +NTSTATUS +EvtDeviceD0Entry( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE PreviousState + ); + + +// +// pci.c +// + +NTSTATUS +pci_save_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + OUT PCI_COMMON_CONFIG* const pConfig ); + +NTSTATUS +pci_hca_reset( + IN struct pci_dev *pdev +); + +void +pci_get_uplink_info( + IN PCI_COMMON_CONFIG * p_cfg, + OUT uplink_info_t * p_uplink_info ); + +NTSTATUS +pci_hca_enable( + IN PBUS_INTERFACE_STANDARD p_ifc, + IN PCI_COMMON_CONFIG* p_cfg + ); + +// +// pdo.c +// + +NTSTATUS +create_pdo( + __in WDFDEVICE Device, + __in PWCHAR HardwareIds, + __in ULONG SerialNo +); + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/makefile b/branches/ConnectX/hw/mlx4/kernel/bus/makefile new file mode 100644 index 00000000..7d80ec72 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/makefile @@ -0,0 +1,8 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the Windows NT DDK +# +!INCLUDE ..\..\..\..\inc\openib.def + + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/makefile.inc b/branches/ConnectX/hw/mlx4/kernel/bus/makefile.inc new file mode 100644 index 00000000..6f8b1a66 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/makefile.inc @@ -0,0 +1,10 @@ +mofcomp: mlx4_bus.bmf + +mlx4_bus.bmf: bus.mof + mofcomp -B:$(OBJ_PATH)\$O\mlx4_bus.bmf bus.mof + wmimofck $(OBJ_PATH)\$O\mlx4_bus.bmf + + + + + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.cdf b/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.cdf new file mode 100644 index 00000000..032c5d7b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.cdf @@ -0,0 +1,9 @@ +[CatalogHeader] +Name=mlx4_bus.cat +PublicVersion=0x0000001 +EncodingType=0x00010001 +CATATTR1=0x10010001:OSAttr:2:6.0 +[CatalogFiles] +mlx4_bus.inf=mlx4_bus.inf +mlx4_bus.sys=mlx4_bus.sys +WdfCoInstaller01005.dll=WdfCoInstaller01005.dll diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.inf b/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.inf new file mode 100644 index 00000000..2bff3c58 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus.inf @@ -0,0 +1,211 @@ +; Mellanox Technologies InfiniBand HCAs. +; Copyright 2008 Mellanox Technologies all Rights Reserved. + +[Version] +Signature="$WINDOWS NT$" +Class=Mlx4Bus +ClassGUID={714995B2-CD65-4a47-BCFE-95AC73A0D780} +Provider=%MTL% +; must be synchronized with bus\drv.c +DriverVer=02/01/2008,1.0.0.0 +CatalogFile=mlx4_bus.cat + + +;***************************************** +; Destination directory section +;***************************************** + +[DestinationDirs] +DefaultDestDir = 12 +Wdf_CoInstaller_CopyFiles = 11 + + +;***************************************** +; Class Install section +;***************************************** + +[ClassInstall32] +AddReg=ClassAddReg + +[ClassAddReg] +HKR,,,,"Mellanox ConnectX Adapters" +HKR,,Icon,,-5 +HKR,,SilentInstall,,1 + + +;***************************************** +; Device Install section +;***************************************** + +[SourceDisksNames.x86] +1=%DiskId%,,,"" + +[SourceDisksNames.amd64] +1=%DiskId%,,,"" + +[SourceDisksNames.ia64] +1=%DiskId%,,,"" + +[SourceDisksFiles.x86] +mlx4_bus.sys = 1,, +wdfcoinstaller01005.dll = 1,, + +[SourceDisksFiles.amd64] +mlx4_bus.sys = 1,, +wdfcoinstaller01005.dll = 1,, + +[SourceDisksFiles.ia64] +mlx4_bus.sys = 1,, +wdfcoinstaller01005.dll = 1,, + +;***************************************** +; Mlx4Bus Install Section +;***************************************** + +[Manufacturer] +%MTL% = MLX4BUS.DeviceSection,ntx86,ntamd64,ntia64 + +[MLX4BUS.DeviceSection] +; empty since we don't support W9x/Me + +[MLX4BUS.DeviceSection.ntx86] +%MT25408.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6340 +%MT25418.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_634A +%MT25428.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6354 +%MT26418.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6732 +%MT26428.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_673c +%MT00401.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_0191 + +[MLX4BUS.DeviceSection.ntamd64] +%MT25408.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6340 +%MT25418.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_634A +%MT25428.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6354 +%MT25448.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6368 +%MT26418.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6732 +%MT26428.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_673c +%MT00401.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_0191 + +[MLX4BUS.DeviceSection.ntia64] +%MT25408.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6340 +%MT25418.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_634A +%MT25428.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6354 +%MT26418.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_6732 +%MT26428.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_673c +%MT00401.DeviceDesc%=MLX4BUS.DDInstall, PCI\VEN_15B3&DEV_0191 + +[MLX4BUS.DDInstall.ntx86] +CopyFiles = MLX4BUS.CopyFiles + +[MLX4BUS.DDInstall.ntamd64] +CopyFiles = MLX4BUS.CopyFiles + +[MLX4BUS.DDInstall.ntia64] +CopyFiles = MLX4BUS.CopyFiles + +[MLX4BUS.DDInstall.ntx86.Services] +AddService = mlx4_bus,%SPSVCINST_ASSOCSERVICE%,MLX4BUS.ServiceInstall,MLX4BUS.EventLog + +[MLX4BUS.DDInstall.ntamd64.Services] +AddService = mlx4_bus,%SPSVCINST_ASSOCSERVICE%,MLX4BUS.ServiceInstall,MLX4BUS.EventLog + +[MLX4BUS.DDInstall.ntia64.Services] +AddService = mlx4_bus,%SPSVCINST_ASSOCSERVICE%,MLX4BUS.ServiceInstall,MLX4BUS.EventLog + +[MLX4BUS.CopyFiles] +mlx4_bus.sys + + +;***************************************** +; Service Install section +;***************************************** + +[MLX4BUS.ServiceInstall] +DisplayName = %MLX4BUS.ServiceDesc% +ServiceType = %SERVICE_KERNEL_DRIVER% +StartType = %SERVICE_DEMAND_START% +ErrorControl = %SERVICE_ERROR_NORMAL% +ServiceBinary = %12%\mlx4_bus.sys +LoadOrderGroup = extended base +AddReg = MLX4BUS.ParamsReg + +[MLX4BUS.EventLog] +AddReg = MLX4BUS.AddEventLogReg + +[MLX4BUS.AddEventLogReg] +HKR, , EventMessageFile, 0x00020000, "%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\mlx4_bus.sys" +HKR, , TypesSupported, 0x00010001, 7 + +[MLX4BUS.ParamsReg] +HKR,,DeviceCharacteristics,0x10001,0x0100 ; Use same security checks on relative opens +HKR,,Security,,"D:P(A;;GA;;;BA)(A;;GA;;;SY)" ; Allow generic-all access to Built-in administrators and Local system +HKR,"Parameters","DebugLevel",%REG_DWORD%,0x00000003 +HKR,"Parameters","DebugFlags",%REG_DWORD%,0x0000ffff +HKR,"Parameters","LogNumQp",%REG_DWORD%,0x00000011 +HKR,"Parameters","LogNumRdmaRc",%REG_DWORD%,0x00000004 +HKR,"Parameters","LogNumSrq",%REG_DWORD%,0x00000010 +HKR,"Parameters","LogNumCq",%REG_DWORD%,0x00000010 +HKR,"Parameters","LogNumMcg",%REG_DWORD%,0x0000000D +HKR,"Parameters","LogNumMpt",%REG_DWORD%,0x00000011 +HKR,"Parameters","LogNumMtt",%REG_DWORD%,0x00000014 +HKR,"Parameters","EnableQoS",%REG_DWORD%,0x00000001 + +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\E51BB6E2-914A-4e21-93C0-192F4801BBFF","Flags",%REG_DWORD%,0xffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\E51BB6E2-914A-4e21-93C0-192F4801BBFF","Level",%REG_DWORD%,0x3 + +;***************************************** +; WDF Coinstaller installation section +;***************************************** + +[MLX4BUS.DDInstall.ntx86.CoInstallers] +AddReg=Wdf_CoInstaller_AddReg +CopyFiles=Wdf_CoInstaller_CopyFiles + +[MLX4BUS.DDInstall.ntamd64.CoInstallers] +AddReg=Wdf_CoInstaller_AddReg +CopyFiles=Wdf_CoInstaller_CopyFiles + +[MLX4BUS.DDInstall.ntia64.CoInstallers] +AddReg=Wdf_CoInstaller_AddReg +CopyFiles=Wdf_CoInstaller_CopyFiles + +[Wdf_CoInstaller_AddReg] +HKR,,CoInstallers32,0x00010000, "wdfcoinstaller01005.dll,WdfCoInstaller" + +[Wdf_CoInstaller_CopyFiles] +wdfcoinstaller01005.dll + +[MLX4BUS.DDInstall.ntx86.Wdf] +KmdfService = mlx4_bus, mlx4_bus_wdfsect + +[MLX4BUS.DDInstall.ntamd64.Wdf] +KmdfService = mlx4_bus, mlx4_bus_wdfsect + +[MLX4BUS.DDInstall.ntia64.Wdf] +KmdfService = mlx4_bus, mlx4_bus_wdfsect + +[mlx4_bus_wdfsect] +KmdfLibraryVersion = 1.5 + + +;***************************************** +; Strings +;***************************************** + +[Strings] +MTL="Mellanox Technologies Ltd." +MLX4BUS.ServiceDesc = "Mellanox ConnectX Bus Enumerator" +MT25408.DeviceDesc="ConnectX (MT25408) - Mellanox ConnectX SDR Channel Adapter" +MT25418.DeviceDesc="ConnectX (MT25418) - Mellanox ConnectX DDR Channel Adapter" +MT25428.DeviceDesc="ConnectX (MT25428) - Mellanox ConnectX QDR Channel Adapter" +MT25448.DeviceDesc="ConnectX (MT25448) - Mellanox ConnectX QDR Channel Adapter" +MT26418.DeviceDesc="ConnectX (MT26418) - Mellanox ConnectX DDR_G2 Channel Adapter" +MT26428.DeviceDesc="ConnectX (MT26428) - Mellanox ConnectX QDR_G2 Channel Adapter" +MT00401.DeviceDesc="ConnectX (MT00401) - Mellanox ConnectX Channel Adapter in Burning Mode" +DiskId = "Mellanox Mlx4 Bus installation disk" +SPSVCINST_NULL = 0x0 +SPSVCINST_ASSOCSERVICE = 0x00000002 +SERVICE_KERNEL_DRIVER = 1 +SERVICE_DEMAND_START = 3 +SERVICE_ERROR_NORMAL = 1 +REG_DWORD = 0x00010001 +REG_MULTI_SZ_APPEND = 0x00010008 diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus32.cdf b/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus32.cdf new file mode 100644 index 00000000..1dfd8229 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/mlx4_bus32.cdf @@ -0,0 +1,10 @@ +[CatalogHeader] +Name=mlx4_bus.cat +PublicVersion=0x0000001 +EncodingType=0x00010001 +CATATTR1=0x10010001:OSAttr:2:6.0 +[CatalogFiles] +mlx4_bus.inf=mlx4_bus.inf +mlx4_bus.sys=mlx4_bus.sys +WdfCoInstaller01005.dll=WdfCoInstaller01005.dll + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/pci.c b/branches/ConnectX/hw/mlx4/kernel/bus/pci.c new file mode 100644 index 00000000..a4ce1159 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/pci.c @@ -0,0 +1,468 @@ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "pci.tmh" +#endif + +#include +#include +#include + +#define MLX4_RESET_BASE 0xf0000 +#define MLX4_RESET_SIZE 0x400 +#define MLX4_SEM_OFFSET 0x3fc +#define MLX4_RESET_OFFSET 0x10 +#define MLX4_RESET_VALUE swab32(1) + +#define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ) +#define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ) + +#define PCI_CAPABILITY_ID_VPD 0x03 +#define PCI_CAPABILITY_ID_PCIX 0x07 +#define PCI_CAPABILITY_ID_PCIEXP 0x10 + + +/* + * Vital Product Data Capability + */ +typedef struct _PCI_VPD_CAPABILITY { + + PCI_CAPABILITIES_HEADER Header; + + USHORT Flags; + ULONG Data; + +} PCI_VPD_CAPABILITY, *PPCI_VPD_CAPABILITY; + + +/* + * PCI-X Capability + */ +typedef struct _PCI_PCIX_CAPABILITY { + + PCI_CAPABILITIES_HEADER Header; + + USHORT Command; + ULONG Status; + +/* for Command: */ +} PCI_PCIX_CAPABILITY, *PPCI_PCIX_CAPABILITY; + +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ + +/* + * PCI-Express Capability + */ +typedef struct _PCI_PCIEXP_CAPABILITY { + + PCI_CAPABILITIES_HEADER Header; + + USHORT Flags; + ULONG DevCapabilities; + USHORT DevControl; + USHORT DevStatus; + ULONG LinkCapabilities; + USHORT LinkControl; + USHORT LinkStatus; + ULONG SlotCapabilities; + USHORT SlotControl; + USHORT SlotStatus; + USHORT RootControl; + USHORT RootCapabilities; + USHORT RootStatus; +} PCI_PCIEXP_CAPABILITY, *PPCI_PCIEXP_CAPABILITY; + +/* for DevControl: */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ + +static NTSTATUS +__get_bus_ifc( + IN DEVICE_OBJECT* const pDevObj, + IN const GUID* const pGuid, + OUT BUS_INTERFACE_STANDARD *pBusIfc ); + +static NTSTATUS +__restore_pci_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + IN PCI_COMMON_CONFIG* const pConfig ); + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, __get_bus_ifc) +#pragma alloc_text (PAGE, __restore_pci_config) +#endif + +/* + * Returns the offset in configuration space of the PCI-X capabilites. + */ +static ULONG +__find_capability( + IN PCI_COMMON_CONFIG* const pConfig, + IN char cap_id + ) +{ + ULONG offset = 0; + PCI_CAPABILITIES_HEADER *pHdr = NULL; + UCHAR *pBuf = (UCHAR*)pConfig; + + MLX4_ENTER( MLX4_DBG_PNP ); + + if ( pConfig->HeaderType == PCI_DEVICE_TYPE ) { + if( pConfig->u.type0.CapabilitiesPtr ) + { + pHdr = (PCI_CAPABILITIES_HEADER*) + (pBuf + pConfig->u.type0.CapabilitiesPtr); + } + } + + if ( pConfig->HeaderType == PCI_BRIDGE_TYPE ) { + if( pConfig->u.type1.CapabilitiesPtr ) + { + pHdr = (PCI_CAPABILITIES_HEADER*) + (pBuf + pConfig->u.type1.CapabilitiesPtr); + } + } + + /* + * Fix up any fields that might cause changes to the + * device - like writing VPD data. + */ + while( pHdr ) + { + if( pHdr->CapabilityID == cap_id ) + { + offset = (UCHAR)(((ULONG_PTR)pHdr) - ((ULONG_PTR)pConfig)); + break; + } + + if( pHdr->Next ) + pHdr = (PCI_CAPABILITIES_HEADER*)(pBuf + pHdr->Next); + else + pHdr = NULL; + } + + MLX4_EXIT( MLX4_DBG_PNP ); + return offset; +} + +/* + * Restore saved PCI configuration, skipping registers 22 and 23, as well + * as any registers where writing will have side effects such as the flags + * field of the VPD and vendor specific capabilities. The function also delays + * writing the command register, bridge control register (if applicable), and + * PCIX command register (if present). + */ +static NTSTATUS +__restore_pci_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + IN PCI_COMMON_CONFIG* const pConfig ) +{ + NTSTATUS status = STATUS_SUCCESS; + int i, *pci_hdr = (int*)pConfig; + int hca_pcix_cap = 0; + + MLX4_ENTER( MLX4_DBG_PNP ); + + /* get capabilities */ + hca_pcix_cap = __find_capability( pConfig, PCI_CAPABILITY_ID_PCIX ); + + /* restore capabilities*/ + { + int hca_pcie_cap = __find_capability( pConfig, PCI_CAPABILITY_ID_PCIEXP ); + PCI_PCIEXP_CAPABILITY *pPciExpCap = (PCI_PCIEXP_CAPABILITY*)(((UCHAR*)pConfig) + hca_pcie_cap); + + if (hca_pcix_cap) { + if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pci_hdr[hca_pcix_cap/4], hca_pcix_cap, 4) ) { + MLX4_PRINT( TRACE_LEVEL_ERROR, MLX4_DBG_PNP, + ("Couldn't restore HCA PCI-X command register, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + } + + if (hca_pcie_cap) { + /* restore HCA PCI Express Device Control register */ + if ( sizeof( pPciExpCap->DevControl ) != pBusIfc->SetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pPciExpCap->DevControl, hca_pcie_cap + + offsetof( PCI_PCIEXP_CAPABILITY, DevControl), + sizeof( pPciExpCap->DevControl ) )) { + MLX4_PRINT( TRACE_LEVEL_ERROR, MLX4_DBG_PNP, + ("Couldn't restore HCA PCI Express Device Control register, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + /* restore HCA PCI Express Link Control register */ + if ( sizeof( pPciExpCap->LinkControl ) != pBusIfc->SetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pPciExpCap->LinkControl, hca_pcie_cap + + offsetof( PCI_PCIEXP_CAPABILITY, LinkControl), + sizeof( pPciExpCap->LinkControl ) )) { + MLX4_PRINT( TRACE_LEVEL_ERROR, MLX4_DBG_PNP, + ("Couldn't restore HCA PCI Express Link Control register, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + } + } + + /* write basic part */ + for (i = 0; i < 16; ++i) { + if (i == 1) + continue; + + if (4 != pBusIfc->SetBusData( pBusIfc->Context, + PCI_WHICHSPACE_CONFIG, &pci_hdr[i], i * 4, 4 )) { + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_PNP , + ("Couldn't restore PCI cfg reg %x, aborting.\n", i)); + status = STATUS_DEVICE_NOT_READY; + goto out; + } + } + + /* Write the command register. */ + if (4 != pBusIfc->SetBusData( pBusIfc->Context, + PCI_WHICHSPACE_CONFIG, &pci_hdr[1], 4, 4 )) { + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_PNP ,("Couldn't restore COMMAND.\n")); + status = STATUS_DEVICE_NOT_READY; + } + +out: + MLX4_EXIT( MLX4_DBG_PNP ); + return status; +} + +/* + * Reads and saves the PCI configuration of the device accessible + * through the provided bus interface. Does not read registers 22 or 23 + * as directed in Tavor PRM 1.0.1, Appendix A. InfiniHost Software Reset. + */ +NTSTATUS +pci_save_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + OUT PCI_COMMON_CONFIG* const pConfig ) +{ + ULONG len; + UINT32 *pBuf; + + MLX4_ENTER( MLX4_DBG_PNP ); + + pBuf = (UINT32*)pConfig; + + /* + * Read the lower portion of the configuration, up to but excluding + * register 22. + */ + len = pBusIfc->GetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, &pBuf[0], 0, 88 ); + if( len != 88 ) + { + MLX4_PRINT( TRACE_LEVEL_ERROR , MLX4_DBG_PNP ,("Failed to read HCA config.\n")); + return STATUS_DEVICE_NOT_READY; + } + + /* Read the upper portion of the configuration, from register 24. */ + len = pBusIfc->GetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, &pBuf[24], 96, 160 ); + if( len != 160 ) + { + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_PNP ,("Failed to read HCA config.\n")); + return STATUS_DEVICE_NOT_READY; + } + + MLX4_EXIT( MLX4_DBG_PNP ); + return STATUS_SUCCESS; +} + + +NTSTATUS +pci_hca_reset( + IN struct pci_dev *pdev +) +{ + u32 sem; + NTSTATUS status = STATUS_SUCCESS; + PBUS_INTERFACE_STANDARD p_ifc = &pdev->bus_pci_ifc; + PCI_COMMON_CONFIG* p_cfg = &pdev->pci_cfg_space; + + MLX4_ENTER( MLX4_DBG_PNP ); + + { + u64 end; + PUCHAR p_reset; + PHYSICAL_ADDRESS pa; + int cnt = 0; + + /* map reset register */ + pa.QuadPart = pdev->bar[HCA_BAR_TYPE_HCR].phys + (uint64_t)MLX4_RESET_BASE; + p_reset = MmMapIoSpace( pa, MLX4_RESET_SIZE, MmNonCached ); + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP , + ("Reset area ia mapped from pa 0x%I64x to va %p, size %#x\n", + pa.QuadPart, p_reset, MLX4_RESET_SIZE)); + if( !p_reset ) { + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_PNP ,("Failed to map reset register with address 0x%I64x\n", pa.QuadPart)); + status = STATUS_UNSUCCESSFUL; + goto err; + } + + /* grab HW semaphore to lock out flash updates f0014 - dev_id 00a00190 */ + end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES; + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP , + ("Obtaining HW semaphore at %p till %I64d\n", p_reset + MLX4_SEM_OFFSET, end)); + do { + sem = READ_REGISTER_ULONG((volatile ULONG*)(p_reset + MLX4_SEM_OFFSET)); + if (!sem) + break; + + cl_thread_suspend(1); + } while (time_before(jiffies, end) || ++cnt < 100); + + if (sem) { + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP , + ("Failed to obtain HW semaphore in %d attemps till %I64d, aborting\n", + cnt, jiffies)); + status = STATUS_UNSUCCESSFUL; + MmUnmapIoSpace( p_reset, MLX4_RESET_SIZE ); + goto err; + } + + + /* Issue the reset. */ + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP , + ("Resetting the chip at %p with %#x...\n", p_reset + MLX4_RESET_OFFSET, MLX4_RESET_VALUE)); + WRITE_REGISTER_ULONG( (volatile ULONG*)(p_reset + MLX4_RESET_OFFSET), MLX4_RESET_VALUE ); + + /* unmap the reset register */ + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP ,("Unmapping reset register \n")); + MmUnmapIoSpace( p_reset, MLX4_RESET_SIZE ); + + /* Wait a second. */ + cl_thread_suspend( 1000 ); + } + + /* Read the configuration register until it doesn't return 0xFFFFFFFF */ + { + ULONG data, i, reset_failed = 1; + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP ,("Read the configuration register \n")); + for( i = 0; i < 100; i++ ) { + if (4 != p_ifc->GetBusData( p_ifc->Context, + PCI_WHICHSPACE_CONFIG, &data, 0, 4)) { + MLX4_PRINT( TRACE_LEVEL_ERROR, MLX4_DBG_PNP, + ("Failed to read device configuration data. Card reset failed !\n")); + status = STATUS_UNSUCCESSFUL; + break; + } + /* See if we got valid data. */ + if( data != 0xFFFFFFFF ) { + reset_failed = 0; + break; + } + + cl_thread_suspend( 100 ); + } + + if (reset_failed) { + MLX4_PRINT( TRACE_LEVEL_ERROR, MLX4_DBG_PNP, + ("Doh! PCI device did not come back after reset!\n")); + status = STATUS_UNSUCCESSFUL; + goto err; + } + } + + /* restore the HCA's PCI configuration headers */ + { + /* Restore the HCA's configuration. */ + MLX4_PRINT( TRACE_LEVEL_INFORMATION ,MLX4_DBG_PNP ,("Restoring HCA PCI configuration \n")); + status = __restore_pci_config( p_ifc, p_cfg ); + if( !NT_SUCCESS( status ) ) { + MLX4_PRINT( TRACE_LEVEL_ERROR, MLX4_DBG_PNP, + ("Failed to restore HCA config. Card reset failed !\n")); + goto err; + } + } + + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_PNP , ("HCA is reset ! \n")); + + status = STATUS_SUCCESS; + +err: + MLX4_EXIT( MLX4_DBG_PNP ); + return status; +} + + +/* + * Tunes PCI configuration as described in 13.3.2 in the Tavor PRM. + */ +void +pci_get_uplink_info( + IN PCI_COMMON_CONFIG * p_cfg, + OUT uplink_info_t * p_uplink_info ) +{ + ULONG capOffset; + PCI_PCIX_CAPABILITY *pPciXCap; + PCI_PCIEXP_CAPABILITY *pPciExpCap; + + MLX4_ENTER( MLX4_DBG_PNP ); + + // PCIX Capability + capOffset = __find_capability( p_cfg, PCI_CAPABILITY_ID_PCIX ); + if( capOffset ) { + pPciXCap = (PCI_PCIX_CAPABILITY*)(((UCHAR*)p_cfg) + capOffset); + + p_uplink_info->bus_type = UPLINK_BUS_PCIX; + if (pPciXCap->Status & (1 << 17)) + p_uplink_info->u.pci_x.capabilities = UPLINK_BUS_PCIX_133; + + } + + // PCI Express Capability + capOffset = __find_capability( p_cfg, PCI_CAPABILITY_ID_PCIEXP ); + if( capOffset ) { + pPciExpCap = (PCI_PCIEXP_CAPABILITY*)(((UCHAR*)p_cfg) + capOffset); + + p_uplink_info->bus_type = UPLINK_BUS_PCIE; + if ((pPciExpCap->LinkStatus & 15) == 1) + p_uplink_info->u.pci_e.link_speed = UPLINK_BUS_PCIE_SDR; + if ((pPciExpCap->LinkStatus & 15) == 2) + p_uplink_info->u.pci_e.link_speed = UPLINK_BUS_PCIE_DDR; + p_uplink_info->u.pci_e.link_width = (uint8_t)((pPciExpCap->LinkStatus >> 4) & 0x03f); + p_uplink_info->u.pci_e.capabilities = (uint8_t)((pPciExpCap->LinkCapabilities >> 2) & 0xfc); + p_uplink_info->u.pci_e.capabilities |= pPciExpCap->LinkCapabilities & 3; + } + + MLX4_EXIT( MLX4_DBG_PNP ); +} + +NTSTATUS +pci_hca_enable( + IN PBUS_INTERFACE_STANDARD p_ifc, + IN PCI_COMMON_CONFIG* p_cfg + ) +{ + NTSTATUS status = STATUS_SUCCESS; + ULONG len; + + MLX4_ENTER( MLX4_DBG_PNP ); + + /* fix command register (set PCI Master bit) */ + // NOTE: we change here the saved value of the command register + if ( (p_cfg->Command & 7) != 7 ) { + p_cfg->Command |= 7; + len = p_ifc->SetBusData( p_ifc->Context, PCI_WHICHSPACE_CONFIG, + (PVOID)&p_cfg->Command , 4, sizeof(ULONG) ); + if( len != sizeof(ULONG) ) { + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_PNP ,("Failed to write command register.\n")); + status = STATUS_DEVICE_NOT_READY; + } + } + + MLX4_EXIT( MLX4_DBG_PNP ); + return status; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/pdo.c b/branches/ConnectX/hw/mlx4/kernel/bus/pdo.c new file mode 100644 index 00000000..4fb02c5d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/pdo.c @@ -0,0 +1,240 @@ +#include "precomp.h" +#include +#include + +#if defined(EVENT_TRACING) +#include "pdo.tmh" +#endif + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(PAGE, create_pdo) +#endif + +#define MAX_ID_LEN 80 + +NTSTATUS +create_pdo( + __in WDFDEVICE Device, + __in PWCHAR HardwareIds, + __in ULONG SerialNo +) +/*++ + +Routine Description: + + This routine creates and initialize a PDO. + +Arguments: + +Return Value: + + NT Status code. + +--*/ +{ + NTSTATUS status; + PWDFDEVICE_INIT pDeviceInit = NULL; + PPDO_DEVICE_DATA p_pdo = NULL; + PFDO_DEVICE_DATA p_fdo; + WDFDEVICE hChild = NULL; + WDF_OBJECT_ATTRIBUTES pdoAttributes; + WDF_DEVICE_PNP_CAPABILITIES pnpCaps; + WDF_DEVICE_POWER_CAPABILITIES powerCaps; + DECLARE_CONST_UNICODE_STRING(compatId, BUSENUM_COMPATIBLE_IDS); + DECLARE_CONST_UNICODE_STRING(deviceLocation, L"MLX4 Bus 0"); + UNICODE_STRING deviceId; + DECLARE_UNICODE_STRING_SIZE(buffer, MAX_ID_LEN); + + MLX4_PRINT(TRACE_LEVEL_INFORMATION, MLX4_DBG_DRV, ("Entered CreatePdo\n")); + + PAGED_CODE(); + + // + // Allocate a WDFDEVICE_INIT structure and set the properties + // so that we can create a device object for the child. + // + pDeviceInit = WdfPdoInitAllocate(Device); + + if (pDeviceInit == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto Cleanup; + } + + // + // Set DeviceType + // + WdfDeviceInitSetDeviceType(pDeviceInit, FILE_DEVICE_BUS_EXTENDER); + + // + // Provide DeviceID, HardwareIDs, CompatibleIDs and InstanceId + // + RtlInitUnicodeString(&deviceId,HardwareIds); + + status = WdfPdoInitAssignDeviceID(pDeviceInit, &deviceId); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + // + // Note same string is used to initialize hardware id too + // + status = WdfPdoInitAddHardwareID(pDeviceInit, &deviceId); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + status = WdfPdoInitAddCompatibleID(pDeviceInit, &compatId); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + status = RtlUnicodeStringPrintf(&buffer, L"%02d", SerialNo); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + status = WdfPdoInitAssignInstanceID(pDeviceInit, &buffer); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + // + // Provide a description about the device. This text is usually read from + // the device. In the case of USB device, this text comes from the string + // descriptor. This text is displayed momentarily by the PnP manager while + // it's looking for a matching INF. If it finds one, it uses the Device + // Description from the INF file or the friendly name created by + // coinstallers to display in the device manager. FriendlyName takes + // precedence over the DeviceDesc from the INF file. + // + status = RtlUnicodeStringPrintf(&buffer,L"Mellanox ConnectX Virtual Infiniband Adapter (#%02d)", SerialNo ); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + // + // You can call WdfPdoInitAddDeviceText multiple times, adding device + // text for multiple locales. When the system displays the text, it + // chooses the text that matches the current locale, if available. + // Otherwise it will use the string for the default locale. + // The driver can specify the driver's default locale by calling + // WdfPdoInitSetDefaultLocale. + // + status = WdfPdoInitAddDeviceText(pDeviceInit, + &buffer, &deviceLocation, 0x409); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + WdfPdoInitSetDefaultLocale(pDeviceInit, 0x409); + + // + // Initialize the attributes to specify the size of PDO device extension. + // All the state information private to the PDO will be tracked here. + // + WDF_OBJECT_ATTRIBUTES_INIT_CONTEXT_TYPE(&pdoAttributes, PDO_DEVICE_DATA); + + status = WdfDeviceCreate(&pDeviceInit, &pdoAttributes, &hChild); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + // + // Once the device is created successfully, framework frees the + // DeviceInit memory and sets the pDeviceInit to NULL. So don't + // call any WdfDeviceInit functions after that. + // + // Get the device context. + // + p_pdo = PdoGetData(hChild); + p_fdo = FdoGetData(Device); + + p_pdo->p_fdo = p_fdo; + p_pdo->SerialNo = SerialNo; + p_pdo->PdoDevice = hChild; + + // + // Set some properties for the child device. + // + WDF_DEVICE_PNP_CAPABILITIES_INIT(&pnpCaps); + pnpCaps.Removable = WdfTrue; + pnpCaps.EjectSupported = WdfTrue; + pnpCaps.SurpriseRemovalOK = WdfTrue; + + pnpCaps.Address = SerialNo; + pnpCaps.UINumber = SerialNo; + + WdfDeviceSetPnpCapabilities(hChild, &pnpCaps); + + WDF_DEVICE_POWER_CAPABILITIES_INIT(&powerCaps); + + powerCaps.DeviceD1 = WdfTrue; + powerCaps.WakeFromD1 = WdfTrue; + powerCaps.DeviceWake = PowerDeviceD1; + + powerCaps.DeviceState[PowerSystemWorking] = PowerDeviceD0; + powerCaps.DeviceState[PowerSystemSleeping1] = PowerDeviceD1; + powerCaps.DeviceState[PowerSystemSleeping2] = PowerDeviceD3; + powerCaps.DeviceState[PowerSystemSleeping3] = PowerDeviceD3; + powerCaps.DeviceState[PowerSystemHibernate] = PowerDeviceD3; + powerCaps.DeviceState[PowerSystemShutdown] = PowerDeviceD3; + + WdfDeviceSetPowerCapabilities(hChild, &powerCaps); + + // + // Create a custom interface so that other drivers can + // query (IRP_MN_QUERY_INTERFACE) and use our callbacks directly. + // + p_fdo->bus_ib_ifc.Context = p_pdo; + + WDF_QUERY_INTERFACE_CONFIG_INIT( &p_pdo->qiMlx4Bus, + (PINTERFACE) &p_fdo->bus_ib_ifc, + &MLX4_BUS_IB_INTERFACE_GUID, NULL); + + status = WdfDeviceAddQueryInterface( hChild, &p_pdo->qiMlx4Bus ); + if (!NT_SUCCESS(status)) + goto Cleanup; + + // + // Expose also PCI.SYS interface for MLX4_HCA + // + WDF_QUERY_INTERFACE_CONFIG_INIT( &p_pdo->qiPciBus, + (PINTERFACE) &p_fdo->pci_dev.bus_pci_ifc, + &GUID_BUS_INTERFACE_STANDARD, NULL); + + status = WdfDeviceAddQueryInterface( hChild, &p_pdo->qiPciBus ); + if (!NT_SUCCESS(status)) + goto Cleanup; + + // + // Add this device to the FDO's collection of children. + // After the child device is added to the static collection successfully, + // driver must call WdfPdoMarkMissing to get the device deleted. It + // shouldn't delete the child device directly by calling WdfObjectDelete. + // + status = WdfFdoAddStaticChild(Device, hChild); + if (!NT_SUCCESS(status)) { + goto Cleanup; + } + + return status; + +Cleanup: + KdPrint(("BusEnum: Bus_CreatePdo failed %x\n", status)); + + // + // Call WdfDeviceInitFree if you encounter an error before the + // device is created. Once the device is created, framework + // NULLs the pDeviceInit value. + // + if (pDeviceInit != NULL) { + WdfDeviceInitFree(pDeviceInit); + } + + if(hChild) { + WdfObjectDelete(hChild); + } + + return status; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/precomp.h b/branches/ConnectX/hw/mlx4/kernel/bus/precomp.h new file mode 100644 index 00000000..bcfbcdb5 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/precomp.h @@ -0,0 +1,18 @@ +#include +#include +#define NTSTRSAFE_LIB +#include +#include // required for GUID definitions +#include "public.h" +#include "l2w.h" +#include "ib\mlx4_ib.h" +#include "drv.h" +#if 0 +#include "mxe_hca.h" +#include "mtnic_if_defs.h" +#include "mxe_utils.h" +#include "mxe_wpptrace.h" +#include "mtnic_dev.h" +#include "mxe_drv.h" +#endif + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/sources b/branches/ConnectX/hw/mlx4/kernel/bus/sources new file mode 100644 index 00000000..18f116b1 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/sources @@ -0,0 +1,54 @@ +TARGETNAME=mlx4_bus +TARGETPATH=..\..\..\..\bin\kernel\obj$(BUILD_ALT_DIR) +TARGETTYPE=DRIVER + +!if $(FREEBUILD) +#ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + +SOURCES= \ + bus.rc \ + drv.c \ + pci.c \ + pdo.c \ + wmi.c \ + +PRECOMPILED_INCLUDE=precomp.h + +NTTARGETFILE0=mofcomp + +KMDF_VERSION=1 + +C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS + +INCLUDES=..;..\inc;..\..\inc;..\..\..\..\inc;..\..\..\..\inc\kernel;..\core + + +TARGETLIBS= $(TARGETLIBS) \ + $(DDK_LIB_PATH)\ntstrsafe.lib \ + $(TARGETPATH)\*\complib.lib \ + $(TARGETPATH)\*\mlx4_core.lib \ + $(TARGETPATH)\*\mlx4_ib.lib \ + $(TARGETPATH)\*\mlx4_net.lib + + + +#LINKER_FLAGS=/MAP + + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING + +RUN_WPP= $(SOURCES) -km -dll -ext: .c .cpp .h .C .CPP .H\ +# -preserveext:.cpp .h\ + -scan:mlx4_debug.h \ + -func:MLX4_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:MLX4_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +MSC_OPTIMIZATION=/Oi +MSC_WARNING_LEVEL= /W4 + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/wmi.c b/branches/ConnectX/hw/mlx4/kernel/bus/wmi.c new file mode 100644 index 00000000..e3faf50c --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/wmi.c @@ -0,0 +1,244 @@ +/*++ + +Copyright (c) Microsoft Corporation. All rights reserved. + + THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY + KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR + PURPOSE. + +Module Name: + + WMI.C + +Abstract: + + This module handles all the WMI Irps. + +Environment: + + Kernel mode + +--*/ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#include "wmi.tmh" +#endif + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(PAGE,WmiRegistration) +#pragma alloc_text(PAGE,EvtStdDataSetItem) +#pragma alloc_text(PAGE,EvtStdDataSetInstance) +#pragma alloc_text(PAGE,EvtStdDataQueryInstance) +#endif + +NTSTATUS +WmiRegistration( + WDFDEVICE Device + ) +/*++ +Routine Description + + Registers with WMI as a data provider for this + instance of the device + +--*/ +{ + WDF_WMI_PROVIDER_CONFIG providerConfig; + WDF_WMI_INSTANCE_CONFIG instanceConfig; + PFDO_DEVICE_DATA deviceData; + NTSTATUS status; + DECLARE_CONST_UNICODE_STRING(busRsrcName, BUSRESOURCENAME); + + PAGED_CODE(); + + deviceData = FdoGetData(Device); + + // + // Register WMI classes. + // First specify the resource name which contain the binary mof resource. + // + status = WdfDeviceAssignMofResourceName(Device, &busRsrcName); + if (!NT_SUCCESS(status)) { + return status; + } + + WDF_WMI_PROVIDER_CONFIG_INIT(&providerConfig, &MLX4_BUS_WMI_STD_DATA_GUID); + providerConfig.MinInstanceBufferSize = sizeof(BUS_WMI_STD_DATA); + + // + // You would want to create a WDFWMIPROVIDER handle separately if you are + // going to dynamically create instances on the provider. Since we are + // statically creating one instance, there is no need to create the provider + // handle. + // + WDF_WMI_INSTANCE_CONFIG_INIT_PROVIDER_CONFIG(&instanceConfig, &providerConfig); + + // + // By setting Register to TRUE, we tell the framework to create a provider + // as part of the Instance creation call. This eliminates the need to + // call WdfWmiProviderRegister. + // + instanceConfig.Register = TRUE; + instanceConfig.EvtWmiInstanceQueryInstance = EvtStdDataQueryInstance; + instanceConfig.EvtWmiInstanceSetInstance = EvtStdDataSetInstance; + instanceConfig.EvtWmiInstanceSetItem = EvtStdDataSetItem; + + status = WdfWmiInstanceCreate( Device, + &instanceConfig, WDF_NO_OBJECT_ATTRIBUTES,WDF_NO_HANDLE ); + + return status; +} + +// +// WMI System Call back functions +// +NTSTATUS +EvtStdDataSetItem( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG DataItemId, + IN ULONG InBufferSize, + IN PVOID InBuffer + ) +/*++ + +Routine Description: + + This routine is a callback into the driver to set for the contents of + an instance. + +Arguments: + + WmiInstance is the instance being set + + DataItemId has the id of the data item being set + + InBufferSize has the size of the data item passed + + InBuffer has the new values for the data item + +Return Value: + + status + +--*/ +{ + PFDO_DEVICE_DATA fdoData; + + PAGED_CODE(); + + fdoData = FdoGetData(WdfWmiInstanceGetDevice(WmiInstance)); + + switch(DataItemId) + { + case 1: + if (InBufferSize < sizeof(ULONG)) { + return STATUS_BUFFER_TOO_SMALL; + } + g_mlx4_dbg_level = fdoData->WmiData.DebugPrintLevel = *((PULONG)InBuffer); + return STATUS_SUCCESS; + + case 2: + if (InBufferSize < sizeof(ULONG)) { + return STATUS_BUFFER_TOO_SMALL; + } + g_mlx4_dbg_flags = fdoData->WmiData.DebugPrintFlags = *((PULONG)InBuffer); + return STATUS_SUCCESS; + + default: + return STATUS_WMI_READ_ONLY; + } +} + +NTSTATUS +EvtStdDataSetInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG InBufferSize, + IN PVOID InBuffer + ) +/*++ + +Routine Description: + + This routine is a callback into the driver to set for the contents of + an instance. + +Arguments: + + WmiInstance is the instance being set + + BufferSize has the size of the data block passed + + Buffer has the new values for the data block + +Return Value: + + status + +--*/ +{ + PFDO_DEVICE_DATA fdoData; + + UNREFERENCED_PARAMETER(InBufferSize); + + PAGED_CODE(); + + fdoData = FdoGetData(WdfWmiInstanceGetDevice(WmiInstance)); + + // + // We will update only writable elements. + // + g_mlx4_dbg_level = fdoData->WmiData.DebugPrintLevel = ((PBUS_WMI_STD_DATA)InBuffer)->DebugPrintLevel; + g_mlx4_dbg_flags = fdoData->WmiData.DebugPrintFlags = ((PBUS_WMI_STD_DATA)InBuffer)->DebugPrintFlags; + + return STATUS_SUCCESS; +} + +NTSTATUS +EvtStdDataQueryInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG OutBufferSize, + IN PVOID OutBuffer, + OUT PULONG BufferUsed + ) +/*++ + +Routine Description: + + This routine is a callback into the driver to set for the contents of + a wmi instance + +Arguments: + + WmiInstance is the instance being set + + OutBufferSize on has the maximum size available to write the data + block. + + OutBuffer on return is filled with the returned data block + + BufferUsed pointer containing how many bytes are required (upon failure) or + how many bytes were used (upon success) + +Return Value: + + status + +--*/ +{ + PFDO_DEVICE_DATA fdoData; + + UNREFERENCED_PARAMETER(OutBufferSize); + + PAGED_CODE(); + + fdoData = FdoGetData(WdfWmiInstanceGetDevice(WmiInstance)); + + *BufferUsed = sizeof (BUS_WMI_STD_DATA); + * (PBUS_WMI_STD_DATA) OutBuffer = fdoData->WmiData; + + return STATUS_SUCCESS; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/bus/wpptrace.h b/branches/ConnectX/hw/mlx4/kernel/bus/wpptrace.h new file mode 100644 index 00000000..81ab3352 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/bus/wpptrace.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2005 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * This source code may incorporate intellectual property owned by + * Microsoft Corporation. Our provision of this source code does not + * include any licenses or any other rights to you under any Microsoft + * intellectual property. If you would like a license from Microsoft + * (e.g., to rebrand, redistribute), you need to contact Microsoft + * directly. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// Author: Uri Habusha + +#pragma once + +#if defined(EVENT_TRACING) + +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(EthrnetGuid,(d7221994, d451, 4272, af18, 55df9ca9bfa7), \ + WPP_DEFINE_BIT(BUS_DRIVER) \ + WPP_DEFINE_BIT(BUS_SS) \ + WPP_DEFINE_BIT(BUS_PNP) \ + WPP_DEFINE_BIT(BUS_IOCTL) \ + WPP_DEFINE_BIT(BUS_POWER) \ + WPP_DEFINE_BIT(BUS_WMI)) + + +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) + +// begin_wpp config +// TRACE_FUNC_ENTER(FLAG); +// TRACE_FUNC_EXIT(FLAG); +// TRACE_PRINT(LEVEL,FLAGS,MSG,...) +// USESUFFIX(TRACE_FUNC_ENTER, "====>>> %!FUNC! "); +// USESUFFIX(TRACE_FUNC_EXIT, "<<<====== %!FUNC!]"); +// end_wpp + +#else //defined(EVENT_TRACING) + +#include + +// Debug toppics +#define BUS_DRIVER 0x000001 +#define BUS_SS 0x000002 +#define BUS_PNP 0x000004 +#define BUS_IOCTL 0x000008 +#define BUS_POWER 0x000010 +#define BUS_WMI 0x000020 + +#if DBG + +extern const unsigned int g_SdpDbgLevel; +extern const unsigned int g_SdpDbgFlags; + +// +//BUGBUG: need to protect against context switch otherwise there can +// be mismatched of trace messages. We can't use a simple spinlock +// since some of the printing occours in IRQ level and the spinlock +// can be alreardy use. +// +#define TRACE_PRINT(_level_,_flag_,_msg_) \ + if (g_SdpDbgLevel >= (_level_) && (g_SdpDbgFlags & (_flag_))) \ + { \ + if(_level_ == TRACE_LEVEL_ERROR) \ + DbgPrint ("***ERROR*** "); \ + DbgPrint ("%s(): ",__FUNCTION__); \ + DbgPrint _msg_; \ + } + +#else + +#define TRACE_PRINT(lvl ,flags, msg) + +#endif + + + +#define TRACE_FUNC_ENTER(flags)\ + ETH_PRINT(TRACE_LEVEL_VERBOSE, flags,("===>\n")); + +#define TRACE_FUNC_EXIT(flags)\ + ETH_PRINT(TRACE_LEVEL_VERBOSE, flags, ("<===\n" )); + + + +#endif //defined(EVENT_TRACING) + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/SOURCES b/branches/ConnectX/hw/mlx4/kernel/core/SOURCES new file mode 100644 index 00000000..91882f95 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/SOURCES @@ -0,0 +1,52 @@ +TARGETNAME=mlx4_core +TARGETPATH=..\..\..\..\bin\kernel\obj$(BUILD_ALT_DIR) +TARGETTYPE=DRIVER_LIBRARY + + + +!if $(FREEBUILD) +#ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + + +DLLDEF=core.def +PASS0_HEADERDIR=. + +SOURCES= \ + ev_log.mc \ + core.rc \ + cache.c \ + device.c \ + iobuf.c \ + l2w.c \ + l2w_radix.c \ + l2w_debug.c \ + l2w_memory.c \ + l2w_umem.c \ + pa_cash.c \ + packer.c \ + ud_header.c \ + verbs.c \ + +INCLUDES=..;..\inc;..\net;..\..\..\..\inc;..\..\..\..\inc\kernel; + +C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS + +TARGETLIBS= \ + $(DDK_LIB_PATH)\ntstrsafe.lib \ + $(TARGETPATH)\*\complib.lib + + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING + +RUN_WPP = $(SOURCES) -km -ext: .c .h .C .H \ + -scan:..\mlx4_debug.h \ + -func:MLX4_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:MLX4_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +MSC_WARNING_LEVEL= /W4 diff --git a/branches/ConnectX/hw/mlx4/kernel/core/cache.c b/branches/ConnectX/hw/mlx4/kernel/core/cache.c new file mode 100644 index 00000000..3d8112bb --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/cache.c @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include "ib\mlx4_ib.h" +#include "ib_cache.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "cache.tmh" +#endif + +#pragma warning( disable : 4200) +struct ib_pkey_cache { + int table_len; + u16 table[0]; +}; + +struct ib_gid_cache { + int table_len; + union ib_gid table[0]; +}; +#pragma warning( default : 4200) + +struct ib_update_work { + PIO_WORKITEM work_item; + struct ib_device *device; + u8 port_num; +}; + +static inline int start_port(struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; +} + +static inline int end_port(struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; +} + +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid) +{ + struct ib_gid_cache *cache; + unsigned long flags; + int ret = 0; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, &flags); + + cache = device->cache.gid_cache[port_num - start_port(device)]; + + if (index < 0 || index >= cache->table_len) + ret = -EINVAL; + else + *gid = cache->table[index]; + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_cached_gid); + +int ib_find_cached_gid(struct ib_device *device, + union ib_gid *gid, + u8 *port_num, + u16 *index) +{ + struct ib_gid_cache *cache; + unsigned long flags; + int p, i; + int ret = -ENOENT; + + *port_num = (u8)-1; + if (index) + *index = (u16)-1; + + read_lock_irqsave(&device->cache.lock, &flags); + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + cache = device->cache.gid_cache[p]; + for (i = 0; i < cache->table_len; ++i) { + if (!memcmp(gid, &cache->table[i], sizeof *gid)) { + *port_num = (u8)(p + start_port(device)); + if (index) + *index = (u16)i; + ret = 0; + goto found; + } + } + } +found: + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_find_cached_gid); + +int ib_get_cached_pkey(struct ib_device *device, + u8 port_num, + int index, + u16 *pkey) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int ret = 0; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, &flags); + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + + if (index < 0 || index >= cache->table_len) + ret = -EINVAL; + else + *pkey = cache->table[index]; + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_cached_pkey); + +int ib_find_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int i; + int ret = -ENOENT; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, &flags); + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + + *index = (u16)-1; + + for (i = 0; i < cache->table_len; ++i) + if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { + *index = (u16)i; + ret = 0; + break; + } + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_find_cached_pkey); + +int ib_get_cached_lmc(struct ib_device *device, + u8 port_num, + u8 *lmc) +{ + unsigned long flags; + int ret = 0; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, &flags); + *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_cached_lmc); + +static void ib_cache_update(struct ib_device *device, + u8 port) +{ + struct ib_port_attr *tprops = NULL; + struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; + struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; + int i; + int ret; + + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); + if (!tprops) + return; + + ret = ib_query_port(device, port, tprops); + if (ret) { + printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", + ret, device->name); + goto err; + } + + pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * + sizeof *pkey_cache->table, GFP_KERNEL); + if (!pkey_cache) + goto err; + + pkey_cache->table_len = tprops->pkey_tbl_len; + + gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len * + sizeof *gid_cache->table, GFP_KERNEL); + if (!gid_cache) + goto err; + + gid_cache->table_len = tprops->gid_tbl_len; + + for (i = 0; i < pkey_cache->table_len; ++i) { + ret = ib_query_pkey(device, port, (u16)i, pkey_cache->table + i); + if (ret) { + printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", + ret, device->name, i); + goto err; + } + } + + for (i = 0; i < gid_cache->table_len; ++i) { + ret = ib_query_gid(device, port, i, gid_cache->table + i); + if (ret) { + printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i); + goto err; + } + } + + write_lock_irq(&device->cache.lock); + + old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; + old_gid_cache = device->cache.gid_cache [port - start_port(device)]; + + device->cache.pkey_cache[port - start_port(device)] = pkey_cache; + device->cache.gid_cache [port - start_port(device)] = gid_cache; + + device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; + + write_unlock_irq(&device->cache.lock); + + kfree(old_pkey_cache); + kfree(old_gid_cache); + kfree(tprops); + return; + +err: + kfree(pkey_cache); + kfree(gid_cache); + kfree(tprops); +} + +static void ib_cache_task(void *work_ptr) +{ + struct ib_update_work *work = work_ptr; + + ib_cache_update(work->device, work->port_num); +} + +static void ib_work_item ( + IN PDEVICE_OBJECT DeviceObject, + IN PVOID Context + ) +{ + struct ib_update_work *work = (struct ib_update_work *)Context; + UNREFERENCED_PARAMETER(DeviceObject); + ib_cache_task(Context); + IoFreeWorkItem(work->work_item); + kfree(Context); +} + +static void ib_cache_event(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_update_work *work; + + if (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER) { + work = kmalloc(sizeof *work, GFP_ATOMIC); + if (work) { + work->device = event->device; + work->port_num = event->element.port_num; + + { // schedule a work item to work + // get PDO + PDEVICE_OBJECT pdo = to_mdev(handler->device)->dev->pdev->p_self_do; + + // allocate work item + work->work_item = IoAllocateWorkItem(pdo); + if (work->work_item == NULL) { + //TODO: at least - print error. Need to return code, but the function is void + } + else { // schedule the work + IoQueueWorkItem( + work->work_item, + ib_work_item, + DelayedWorkQueue, + work + ); + } + } + } + } +} + +static void ib_cache_setup_one(struct ib_device *device) +{ + int p; + + rwlock_init(&device->cache.lock); + + device->cache.pkey_cache = + kmalloc(sizeof *device->cache.pkey_cache * + (end_port(device) - start_port(device) + 1), GFP_KERNEL); + device->cache.gid_cache = + kmalloc(sizeof *device->cache.gid_cache * + (end_port(device) - start_port(device) + 1), GFP_KERNEL); + + device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * + (end_port(device) - + start_port(device) + 1), + GFP_KERNEL); + + if (!device->cache.pkey_cache || !device->cache.gid_cache || + !device->cache.lmc_cache) { + printk(KERN_WARNING "Couldn't allocate cache " + "for %s\n", device->name); + goto err; + } + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + device->cache.pkey_cache[p] = NULL; + device->cache.gid_cache [p] = NULL; + ib_cache_update(device, (u8)(p + start_port(device))); + } + + INIT_IB_EVENT_HANDLER(&device->cache.event_handler, + device, ib_cache_event); + if (ib_register_event_handler(&device->cache.event_handler)) + goto err_cache; + + return; + +err_cache: + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + kfree(device->cache.pkey_cache[p]); + kfree(device->cache.gid_cache[p]); + } + +err: + kfree(device->cache.pkey_cache); + kfree(device->cache.gid_cache); + kfree(device->cache.lmc_cache); +} + +static void ib_cache_cleanup_one(struct ib_device *device) +{ + int p; + + ib_unregister_event_handler(&device->cache.event_handler); + // TODO: how to do that ? + // LINUX: flush_scheduled_work(); + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + kfree(device->cache.pkey_cache[p]); + kfree(device->cache.gid_cache[p]); + } + + kfree(device->cache.pkey_cache); + kfree(device->cache.gid_cache); + kfree(device->cache.lmc_cache); +} + +static struct ib_client cache_client = { "cache", ib_cache_setup_one, ib_cache_cleanup_one }; + +int __init ib_cache_setup(void) +{ + return ib_register_client(&cache_client); +} + +void __exit ib_cache_cleanup(void) +{ + ib_unregister_client(&cache_client); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/core/core.def b/branches/ConnectX/hw/mlx4/kernel/core/core.def new file mode 100644 index 00000000..bfbb2f18 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/core.def @@ -0,0 +1,64 @@ +LIBRARY mlx4_core.lib + +EXPORTS +; DllInitialize and DllUnload must be exported for the OS reference counting to +; work, and must be private for the compiler to accept them. +DllInitialize private +DllUnload private + +; l2w.c +pci_pool_create +strlcpy +__bitmap_full +__bitmap_empty +core_init +core_cleanup + +; radix.c +radix_tree_create +radix_tree_insert +radix_tree_lookup +radix_tree_delete +radix_tree_destroy + +; cache.c +ib_get_cached_gid +ib_find_cached_gid +ib_get_cached_pkey +ib_find_cached_pkey +ib_get_cached_lmc + +; packer +ib_pack +ib_unpack + +; ud_header +ib_ud_header_init +ib_ud_header_pack +ib_ud_header_unpack + +; device.c +ib_alloc_device +ib_dealloc_device +ib_register_device +ib_unregister_device +ib_register_client +ib_unregister_client +ib_get_client_data +ib_set_client_data +ib_register_event_handler +ib_unregister_event_handler +ib_dispatch_event +ib_query_device +ib_query_port +ib_query_gid +ib_query_pkey +ib_modify_device +ib_modify_port +ib_find_gid +ib_find_pkey + +; verbs.c +ib_modify_qp_is_ok +ib_create_ah +ib_destroy_ah \ No newline at end of file diff --git a/branches/ConnectX/hw/mlx4/kernel/core/core.h b/branches/ConnectX/hw/mlx4/kernel/core/core.h new file mode 100644 index 00000000..ba5787b3 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/core.h @@ -0,0 +1,12 @@ +#pragma once + +int __init ib_cache_setup(void); + +void __exit ib_cache_cleanup(void); + +int __init ib_core_init(void); + +void __exit ib_core_cleanup(void); + +void init_qp_state_tbl(); + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/core.rc b/branches/ConnectX/hw/mlx4/kernel/core/core.rc new file mode 100644 index 00000000..1d136d44 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/core.rc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ibal.rc 1611 2006-08-20 14:48:55Z sleybo $ + */ + + +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_UNKNOWN + +#ifdef _DEBUG_ +#define VER_FILEDESCRIPTION_STR "MLX4 Upper Layer (Debug)" +#else +#define VER_FILEDESCRIPTION_STR "MLX4 Upper Layer" +#endif + +#define VER_INTERNALNAME_STR "mlx4_core.lib" +#define VER_ORIGINALFILENAME_STR "mlx4_core.lib" + +#include + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/device.c b/branches/ConnectX/hw/mlx4/kernel/core/device.c new file mode 100644 index 00000000..63a92499 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/device.c @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: device.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "device.tmh" +#endif + +#include "l2w.h" +#include "ib_verbs.h" +#include "core.h" + +struct ib_client_data { + struct list_head list; + struct ib_client *client; + void * data; +}; + +static LIST_HEAD(device_list); +static LIST_HEAD(client_list); + +/* + * device_mutex protects access to both device_list and client_list. + * There's no real point to using multiple locks or something fancier + * like an rwsem: we always access both lists, and we're always + * modifying one list or the other list. In any case this is not a + * hot path so there's no point in trying to optimize. + */ +static DEFINE_MUTEX(device_mutex); + +static int ib_device_check_mandatory(struct ib_device *device) +{ +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } + static const struct { + size_t offset; + char *name; + } mandatory_table[] = { + IB_MANDATORY_FUNC(query_device), + IB_MANDATORY_FUNC(query_port), + IB_MANDATORY_FUNC(query_pkey), + IB_MANDATORY_FUNC(query_gid), + IB_MANDATORY_FUNC(alloc_pd), + IB_MANDATORY_FUNC(dealloc_pd), + IB_MANDATORY_FUNC(create_ah), + IB_MANDATORY_FUNC(destroy_ah), + IB_MANDATORY_FUNC(create_qp), + IB_MANDATORY_FUNC(modify_qp), + IB_MANDATORY_FUNC(destroy_qp), + IB_MANDATORY_FUNC(post_send), + IB_MANDATORY_FUNC(post_recv), + IB_MANDATORY_FUNC(create_cq), + IB_MANDATORY_FUNC(destroy_cq), + IB_MANDATORY_FUNC(poll_cq), + IB_MANDATORY_FUNC(req_notify_cq), + IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(dereg_mr) + }; + int i; + + for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { + if (!*(void **) ((u8 *) device + mandatory_table[i].offset)) { + printk(KERN_WARNING "Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name); + return -EINVAL; + } + } + + return 0; +} + +static struct ib_device *__ib_device_get_by_name(const char *name) +{ + struct ib_device *device; + + list_for_each_entry(device, &device_list, core_list, struct ib_device) + if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) + return device; + + return NULL; +} + + +static int alloc_name(char *name) +{ + unsigned long *inuse; + char buf[IB_DEVICE_NAME_MAX]; + struct ib_device *device; + int i; + + inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!inuse) + return -ENOMEM; + + list_for_each_entry(device, &device_list, core_list, struct ib_device) { + if (!sscanf(device->name, name, &i)) + continue; + if (i < 0 || i >= PAGE_SIZE * 8) + continue; + if (RtlStringCbPrintfA(buf, sizeof buf, name, i)) + return -EINVAL; + if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) + set_bit(i, inuse); + } + + i = find_first_zero_bit(inuse, PAGE_SIZE * 8); + free_page(inuse); + if (RtlStringCbPrintfA(buf, sizeof buf, name, i)) + return -EINVAL; + + if (__ib_device_get_by_name(buf)) + return -ENFILE; + + strlcpy(name, buf, IB_DEVICE_NAME_MAX); + return 0; +} + +static int start_port(struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; +} + + +static int end_port(struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; +} + +/** + * ib_alloc_device - allocate an IB device struct + * @size:size of structure to allocate + * + * Low-level drivers should use ib_alloc_device() to allocate &struct + * ib_device. @size is the size of the structure to be allocated, + * including any private data used by the low-level driver. + * ib_dealloc_device() must be used to free structures allocated with + * ib_alloc_device(). + */ +struct ib_device *ib_alloc_device(size_t size) +{ + BUG_ON(size < sizeof (struct ib_device)); + + return kzalloc(size, GFP_KERNEL); +} +EXPORT_SYMBOL(ib_alloc_device); + +/** + * ib_dealloc_device - free an IB device struct + * @device:structure to free + * + * Free a structure allocated with ib_alloc_device(). + */ +void ib_dealloc_device(struct ib_device *device) +{ + if (device->reg_state == IB_DEV_UNINITIALIZED) { + kfree(device); + return; + } + + BUG_ON(device->reg_state != IB_DEV_UNREGISTERED); + +} +EXPORT_SYMBOL(ib_dealloc_device); + +static int add_client_context(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + unsigned long flags; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", + device->name, client->name); + return -ENOMEM; + } + + context->client = client; + context->data = NULL; + + spin_lock_irqsave(&device->client_data_lock, &flags); + list_add(&context->list, &device->client_data_list); + spin_unlock_irqrestore(&device->client_data_lock, flags); + + return 0; +} + +static int read_port_table_lengths(struct ib_device *device) +{ + struct ib_port_attr *tprops = NULL; + int num_ports, ret = -ENOMEM; + u8 port_index; + + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); + if (!tprops) + goto out; + + num_ports = end_port(device) - start_port(device) + 1; + + device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, + GFP_KERNEL); + device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, + GFP_KERNEL); + if (!device->pkey_tbl_len || !device->gid_tbl_len) + goto err; + + for (port_index = 0; port_index < num_ports; ++port_index) { + ret = ib_query_port(device, (u8)(port_index + start_port(device)), + tprops); + if (ret) + goto err; + device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; + device->gid_tbl_len[port_index] = tprops->gid_tbl_len; + } + + ret = 0; + goto out; + +err: + kfree(device->gid_tbl_len); + kfree(device->pkey_tbl_len); +out: + kfree(tprops); + return ret; +} + +/** + * ib_register_device - Register an IB device with IB core + * @device:Device to register + * + * Low-level drivers use ib_register_device() to register their + * devices with the IB core. All registered clients will receive a + * callback for each device that is added. @device must be allocated + * with ib_alloc_device(). + */ +int ib_register_device(struct ib_device *device) +{ + int ret; + + mutex_lock(&device_mutex); + + if (strchr(device->name, '%')) { + ret = alloc_name(device->name); + if (ret) + goto out; + } + + if (ib_device_check_mandatory(device)) { + ret = -EINVAL; + goto out; + } + + INIT_LIST_HEAD(&device->event_handler_list); + INIT_LIST_HEAD(&device->client_data_list); + spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->client_data_lock); + + ret = read_port_table_lengths(device); + if (ret) { + printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", + device->name); + goto out; + } + + list_add_tail(&device->core_list, &device_list); + + device->reg_state = IB_DEV_REGISTERED; + + { + struct ib_client *client; + + list_for_each_entry(client, &client_list, list, struct ib_client) + if (client->add && !add_client_context(device, client)) + client->add(device); + } + + out: + mutex_unlock(&device_mutex); + return ret; +} +EXPORT_SYMBOL(ib_register_device); + +/** + * ib_unregister_device - Unregister an IB device + * @device:Device to unregister + * + * Unregister an IB device. All clients will receive a remove callback. + */ +void ib_unregister_device(struct ib_device *device) +{ + struct ib_client *client; + struct ib_client_data *context, *tmp; + unsigned long flags; + + mutex_lock(&device_mutex); + + list_for_each_entry_reverse(client, &client_list, list, struct ib_client) + if (client->remove) + client->remove(device); + + list_del(&device->core_list); + + kfree(device->gid_tbl_len); + kfree(device->pkey_tbl_len); + + mutex_unlock(&device_mutex); + + spin_lock_irqsave(&device->client_data_lock, &flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list, struct ib_client_data, struct ib_client_data) + kfree(context); + spin_unlock_irqrestore(&device->client_data_lock, flags); + + device->reg_state = IB_DEV_UNREGISTERED; +} +EXPORT_SYMBOL(ib_unregister_device); + +/** + * ib_register_client - Register an IB client + * @client:Client to register + * + * Upper level users of the IB drivers can use ib_register_client() to + * register callbacks for IB device addition and removal. When an IB + * device is added, each registered client's add method will be called + * (in the order the clients were registered), and when a device is + * removed, each client's remove method will be called (in the reverse + * order that clients were registered). In addition, when + * ib_register_client() is called, the client will receive an add + * callback for all devices already registered. + */ +int ib_register_client(struct ib_client *client) +{ + struct ib_device *device; + + mutex_lock(&device_mutex); + + list_add_tail(&client->list, &client_list); + list_for_each_entry(device, &device_list, core_list, struct ib_device) + if (client->add && !add_client_context(device, client)) + client->add(device); + + mutex_unlock(&device_mutex); + + return 0; +} +EXPORT_SYMBOL(ib_register_client); + +/** + * ib_unregister_client - Unregister an IB client + * @client:Client to unregister + * + * Upper level users use ib_unregister_client() to remove their client + * registration. When ib_unregister_client() is called, the client + * will receive a remove callback for each IB device still registered. + */ +void ib_unregister_client(struct ib_client *client) +{ + struct ib_client_data *context, *tmp; + struct ib_device *device; + unsigned long flags; + + mutex_lock(&device_mutex); + + list_for_each_entry(device, &device_list, core_list, struct ib_device) { + if (client->remove) + client->remove(device); + + spin_lock_irqsave(&device->client_data_lock, &flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list, struct ib_client_data, struct ib_client_data) + if (context->client == client) { + list_del(&context->list); + kfree(context); + } + spin_unlock_irqrestore(&device->client_data_lock, flags); + } + list_del(&client->list); + + mutex_unlock(&device_mutex); +} +EXPORT_SYMBOL(ib_unregister_client); + +/** + * ib_get_client_data - Get IB client context + * @device:Device to get context for + * @client:Client to get context for + * + * ib_get_client_data() returns client context set with + * ib_set_client_data(). + */ +void *ib_get_client_data(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + void *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&device->client_data_lock, &flags); + list_for_each_entry(context, &device->client_data_list, list, struct ib_client_data) + if (context->client == client) { + ret = context->data; + break; + } + spin_unlock_irqrestore(&device->client_data_lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_client_data); + +/** + * ib_set_client_data - Set IB client context + * @device:Device to set context for + * @client:Client to set context for + * @data:Context to set + * + * ib_set_client_data() sets client context that can be retrieved with + * ib_get_client_data(). + */ +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data) +{ + struct ib_client_data *context; + unsigned long flags; + + spin_lock_irqsave(&device->client_data_lock, &flags); + list_for_each_entry(context, &device->client_data_list, list, struct ib_client_data) + if (context->client == client) { + context->data = data; + goto out; + } + + printk(KERN_WARNING "No client context found for %s/%s\n", + device->name, client->name); + +out: + spin_unlock_irqrestore(&device->client_data_lock, flags); +} +EXPORT_SYMBOL(ib_set_client_data); + +/** + * ib_register_event_handler - Register an IB event handler + * @event_handler:Handler to register + * + * ib_register_event_handler() registers an event handler that will be + * called back when asynchronous IB events occur (as defined in + * chapter 11 of the InfiniBand Architecture Specification). This + * callback may occur in interrupt context. + */ +int ib_register_event_handler (struct ib_event_handler *event_handler) +{ + unsigned long flags; + + spin_lock_irqsave(&event_handler->device->event_handler_lock, &flags); + list_add_tail(&event_handler->list, + &event_handler->device->event_handler_list); + spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_register_event_handler); + +/** + * ib_unregister_event_handler - Unregister an event handler + * @event_handler:Handler to unregister + * + * Unregister an event handler registered with + * ib_register_event_handler(). + */ +int ib_unregister_event_handler(struct ib_event_handler *event_handler) +{ + unsigned long flags; + + spin_lock_irqsave(&event_handler->device->event_handler_lock, &flags); + list_del(&event_handler->list); + spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_unregister_event_handler); + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(struct ib_event *event) +{ + unsigned long flags; + struct ib_event_handler *handler; + + spin_lock_irqsave(&event->device->event_handler_lock, &flags); + + list_for_each_entry(handler, &event->device->event_handler_list, list, struct ib_event_handler) + handler->handler(handler, event); + + spin_unlock_irqrestore(&event->device->event_handler_lock, flags); +} +EXPORT_SYMBOL(ib_dispatch_event); + +/** + * ib_query_device - Query IB device attributes + * @device:Device to query + * @device_attr:Device attributes + * + * ib_query_device() returns the attributes of a device through the + * @device_attr pointer. + */ +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr) +{ + return device->query_device(device, device_attr); +} +EXPORT_SYMBOL(ib_query_device); + +/** + * ib_query_port - Query IB port attributes + * @device:Device to query + * @port_num:Port number to query + * @port_attr:Port attributes + * + * ib_query_port() returns the attributes of a port through the + * @port_attr pointer. + */ +int ib_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) +{ + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + return device->query_port(device, port_num, port_attr); +} +EXPORT_SYMBOL(ib_query_port); + +/** + * ib_query_gid - Get GID table entry + * @device:Device to query + * @port_num:Port number to query + * @index:GID table index to query + * @gid:Returned GID + * + * ib_query_gid() fetches the specified GID table entry. + */ +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid) +{ + return device->query_gid(device, port_num, index, gid); +} +EXPORT_SYMBOL(ib_query_gid); + +/** + * ib_query_pkey - Get P_Key table entry + * @device:Device to query + * @port_num:Port number to query + * @index:P_Key table index to query + * @pkey:Returned P_Key + * + * ib_query_pkey() fetches the specified P_Key table entry. + */ +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey) +{ + return device->query_pkey(device, port_num, index, pkey); +} +EXPORT_SYMBOL(ib_query_pkey); + +/** + * ib_modify_device - Change IB device attributes + * @device:Device to modify + * @device_modify_mask:Mask of attributes to change + * @device_modify:New attribute values + * + * ib_modify_device() changes a device's attributes as specified by + * the @device_modify_mask and @device_modify structure. + */ +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + return device->modify_device(device, device_modify_mask, + device_modify); +} +EXPORT_SYMBOL(ib_modify_device); + +/** + * ib_modify_port - Modifies the attributes for the specified port. + * @device: The device to modify. + * @port_num: The number of the port to modify. + * @port_modify_mask: Mask used to specify which attributes of the port + * to change. + * @port_modify: New attribute values for the port. + * + * ib_modify_port() changes a port's attributes as specified by the + * @port_modify_mask and @port_modify structure. + */ +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify) +{ + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + return device->modify_port(device, port_num, port_modify_mask, + port_modify); +} +EXPORT_SYMBOL(ib_modify_port); + +/** + * ib_find_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the GID table where the GID was found. This + * parameter may be NULL. + */ +int ib_find_gid(struct ib_device *device, union ib_gid *gid, + u8 *port_num, u16 *index) +{ + union ib_gid tmp_gid; + int ret, port, i; + + for (port = start_port(device); port <= end_port(device); ++port) { + for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { + ret = ib_query_gid(device, (u8)port, i, &tmp_gid); + if (ret) + return ret; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { + *port_num = (u8)port; + if (index) + *index = (u16)i; + return 0; + } + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_gid); + +/** + * ib_find_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the PKey table where the PKey was found. + */ +int ib_find_pkey(struct ib_device *device, + u8 port_num, u16 pkey, u16 *index) +{ + int ret, i; + u16 tmp_pkey; + + for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { + ret = ib_query_pkey(device, port_num, (u16)i, &tmp_pkey); + if (ret) + return ret; + + if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { + *index = (u16)i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_pkey); + +int __init ib_core_init(void) +{ + int ret; + + mutex_init(&device_mutex); + ret = ib_cache_setup(); + if (ret) { + printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); + } + + return ret; +} + +void __exit ib_core_cleanup(void) +{ + ib_cache_cleanup(); + /* Make sure that any pending umem accounting work is done. */ + // TODO: how to do that ? + // LINUX: flush_scheduled_work(); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/ev_log.mc b/branches/ConnectX/hw/mlx4/kernel/core/ev_log.mc new file mode 100644 index 00000000..7eb7f3a7 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/ev_log.mc @@ -0,0 +1,56 @@ +;/*++ +;============================================================================= +;Copyright (c) 2007 Mellanox Technologies +; +;Module Name: +; +; ev_log.mc +; +;Abstract: +; +; MLX4 Driver event log messages +; +;Authors: +; +; Leonid Keller +; +;Environment: +; +; Kernel Mode . +; +;============================================================================= +;--*/ +; +MessageIdTypedef = NTSTATUS + +SeverityNames = ( + Success = 0x0:STATUS_SEVERITY_SUCCESS + Informational = 0x1:STATUS_SEVERITY_INFORMATIONAL + Warning = 0x2:STATUS_SEVERITY_WARNING + Error = 0x3:STATUS_SEVERITY_ERROR + ) + +FacilityNames = ( + System = 0x0 + RpcRuntime = 0x2:FACILITY_RPC_RUNTIME + RpcStubs = 0x3:FACILITY_RPC_STUBS + Io = 0x4:FACILITY_IO_ERROR_CODE + MLX4 = 0x8:FACILITY_MLX4_ERROR_CODE + ) + + +MessageId=0x0001 Facility=MLX4 Severity=Informational SymbolicName=EVENT_MLX4_ANY_INFO +Language=English +%2 +. + +MessageId=0x0002 Facility=MLX4 Severity=Warning SymbolicName=EVENT_MLX4_ANY_WARN +Language=English +%2 +. + +MessageId=0x0003 Facility=MLX4 Severity=Error SymbolicName=EVENT_MLX4_ANY_ERROR +Language=English +%2 +. + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/ev_log.rc b/branches/ConnectX/hw/mlx4/kernel/core/ev_log.rc new file mode 100644 index 00000000..a928b138 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/ev_log.rc @@ -0,0 +1,2 @@ +LANGUAGE 0x9,0x1 +1 11 "MSG00001.bin" diff --git a/branches/ConnectX/hw/mlx4/kernel/core/iobuf.c b/branches/ConnectX/hw/mlx4/kernel/core/iobuf.c new file mode 100644 index 00000000..bc39763d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/iobuf.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mt_memory.c 2020 2007-05-01 09:29:10Z leonid $ + */ +#include "l2w.h" +#include "pa_cash.h" +#include "ib_verbs.h" + +#if defined (EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "iobuf.tmh" +#endif + + + + +/* +* Function: map user buffer to kernel and lock it +* +* Return: +*/ +int get_user_pages( + IN struct mlx4_dev *dev, /* device */ + IN u64 start, /* address in user space */ + IN int npages, /* size in pages */ + IN int write_access, /* access rights */ + OUT struct scatterlist *sg /* s/g list */ + ) +{ + PMDL mdl_p; + int size = npages << PAGE_SHIFT; + int access = (write_access) ? IoWriteAccess : IoReadAccess; + int err; + void * kva; /* kernel virtual address */ + + UNREFERENCED_PARAMETER(dev); + + MLX4_ENTER(MLX4_DBG_MEMORY); + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + /* allocate MDL */ + mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)start, (ULONG)size, + FALSE, + FALSE, /* not charge quota */ + NULL); + if (mdl_p == NULL) { + err = -ENOMEM; + goto err0; + } + + /* lock memory */ + __try { + MmProbeAndLockPages( mdl_p, UserMode, access ); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + NTSTATUS Status = GetExceptionCode(); + MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size)); + switch(Status){ + case STATUS_WORKING_SET_QUOTA: + err = -ENOMEM;break; + case STATUS_ACCESS_VIOLATION: + err = -EACCES;break; + default : + err = -EINVAL; + } + + goto err1; + } + + /* map it to kernel */ + kva = MmMapLockedPagesSpecifyCache( mdl_p, + KernelMode, MmNonCached, + NULL, FALSE, NormalPagePriority ); + if (kva == NULL) { + MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n")); + err = -EFAULT; + goto err2; + } + + sg->dma_addr.va = kva; + sg->dma_addr.sz = size; + sg->offset = (unsigned int)(start & ~PAGE_MASK); + sg->p_mdl = mdl_p; + // TODO: has to be dma address, not physical one + sg->dma_addr.da = MmGetPhysicalAddress(kva).QuadPart; + return 0; + +err2: + MmUnlockPages(mdl_p); +err1: + IoFreeMdl(mdl_p); +err0: + MLX4_EXIT(MLX4_DBG_MEMORY); + return err; + + } + +void put_page(struct scatterlist *sg) +{ + if (sg->p_mdl) { + MmUnmapLockedPages( sg->dma_addr.va, sg->p_mdl ); + MmUnlockPages(sg->p_mdl); + IoFreeMdl(sg->p_mdl); + } +} + + +typedef struct _iobuf_seg { + LIST_ENTRY link; + PMDL mdl_p; + u64 va; /* virtual address of the buffer */ + u64 size; /* size in bytes of the buffer */ + u32 nr_pages; + int is_user; +} iobuf_seg_t; + +// Returns: 0 on success, -ENOMEM or -EACCESS on error +static int register_segment( + IN u64 va, + IN u64 size, + IN int is_user, + IN ib_access_t acc, + OUT iobuf_seg_t **iobuf_seg) +{ + PMDL mdl_p; + int rc; + KPROCESSOR_MODE mode; + iobuf_seg_t * new_iobuf; + static ULONG cnt=0; + LOCK_OPERATION Operation; + + // set Operation + if (acc & IB_AC_LOCAL_WRITE) + Operation = IoModifyAccess; + else + Operation = IoReadAccess; + + // allocate IOBUF segment object + new_iobuf = (iobuf_seg_t *)kmalloc(sizeof(iobuf_seg_t), GFP_KERNEL ); + if (new_iobuf == NULL) { + rc = -ENOMEM; + goto err_nomem; + } + + // allocate MDL + mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL); + if (mdl_p == NULL) { + rc = -ENOMEM; + goto err_alloc_mdl; + } + + // make context-dependent things + if (is_user) { + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + mode = UserMode; + } + else { /* Mapping to kernel virtual address */ + // MmBuildMdlForNonPagedPool(mdl_p); // fill MDL ??? - should we do that really ? + mode = KernelMode; + } + + __try { /* try */ + MmProbeAndLockPages( mdl_p, mode, Operation ); /* lock memory */ + } /* try */ + + __except (EXCEPTION_EXECUTE_HANDLER) { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_MEMORY, + ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", + GetExceptionCode(), va, size)); + rc = -EACCES; + goto err_probe; + } + + // fill IOBUF object + new_iobuf->va = va; + new_iobuf->size= size; + new_iobuf->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size ); + new_iobuf->mdl_p = mdl_p; + new_iobuf->is_user = is_user; + *iobuf_seg = new_iobuf; + return 0; + +err_probe: + IoFreeMdl(mdl_p); +err_alloc_mdl: + ExFreePool((PVOID)new_iobuf); +err_nomem: + return rc; +} + +void iobuf_init( + IN u64 va, + IN u64 size, + IN int is_user, + IN OUT iobuf_t *iobuf_p) +{ + iobuf_p->va = va; + iobuf_p->size= size; + iobuf_p->is_user = is_user; + InitializeListHead( &iobuf_p->seg_que ); + iobuf_p->seg_num = 0; + iobuf_p->nr_pages = 0; + iobuf_p->is_cashed = 0; +} + +int iobuf_register( + IN u64 va, + IN u64 size, + IN int is_user, + IN enum ib_access_flags acc, + IN OUT iobuf_t *iobuf_p) +{ + int rc=0; + u64 seg_va; // current segment start + u64 seg_size; // current segment size + u64 rdc; // remain data counter - what is rest to lock + u64 delta; // he size of the last not full page of the first segment + iobuf_seg_t * new_iobuf; + unsigned page_size = PAGE_SIZE; + +// 32 - for any case +#define PFNS_IN_PAGE_SIZE_MDL ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long)) +#define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL) // 4MB + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // we'll try to register all at once. + seg_va = va; + seg_size = rdc = size; + + // allocate segments + while (rdc > 0) { + // map a segment + rc = register_segment(seg_va, seg_size, is_user, acc, &new_iobuf ); + + // success - move to another segment + if (!rc) { + rdc -= seg_size; + seg_va += seg_size; + InsertTailList( &iobuf_p->seg_que, &new_iobuf->link ); + iobuf_p->seg_num++; + // round the segment size to the next page boundary + delta = (seg_va + seg_size) & (page_size - 1); + if (delta) { + seg_size -= delta; + seg_size += page_size; + } + if (seg_size > rdc) + seg_size = rdc; + continue; + } + + // failure - too large a buffer: lessen it and try once more + if (rc == -ENOMEM) { + // no where to lessen - too low memory + if (seg_size <= MIN_IOBUF_SEGMENT_SIZE) + break; + // lessen the size + seg_size >>= 1; + // round the segment size to the next page boundary + delta = (seg_va + seg_size) & (page_size - 1); + if (delta) { + seg_size -= delta; + seg_size += page_size; + } + if (seg_size > rdc) + seg_size = rdc; + continue; + } + + // got unrecoverable error + break; + } + + // SUCCESS + if (rc) + iobuf_deregister( iobuf_p ); + else + iobuf_p->nr_pages += ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size ); + + return rc; +} + + +static void __iobuf_copy( + IN OUT iobuf_t *dst_iobuf_p, + IN iobuf_t *src_iobuf_p + ) +{ + int i; + iobuf_seg_t *iobuf_seg_p; + + *dst_iobuf_p = *src_iobuf_p; + InitializeListHead( &dst_iobuf_p->seg_que ); + for (i=0; iseg_num; ++i) { + iobuf_seg_p = (iobuf_seg_t *)(PVOID)RemoveHeadList( &src_iobuf_p->seg_que ); + InsertTailList( &dst_iobuf_p->seg_que, &iobuf_seg_p->link ); + } +} + +/* if the buffer to be registered overlaps a buffer, already registered, + a race can happen between HCA, writing to the previously registered + buffer and the probing functions (MmProbeAndLockPages, MmSecureVirtualMemory), + used in the algorithm of memory registration. + To prevent the race we maintain reference counters for the physical pages, being registered, + and register every physical page FOR THE WRITE ACCESS only once.*/ + +int iobuf_register_with_cash( + IN u64 vaddr, + IN u64 size, + IN int is_user, + IN OUT enum ib_access_flags *acc_p, + IN OUT iobuf_t *iobuf_p) +{ + int rc, pa_in; + iobuf_t sec_iobuf; + int i, page_in , page_out, page_in_total; + int nr_pages; + char *subregion_start, *va; + u64 subregion_size; + u64 rdc; // remain data counter - what is rest to lock + u64 delta; // he size of the last not full page of the first segment + enum ib_access_flags acc; + + mutex_lock(&g_pa_mutex); + + // register memory for read access to bring pages into the memory + rc = iobuf_register( vaddr, size, is_user, 0, iobuf_p); + + // on error or read access - exit + if (rc || !(*acc_p & IB_ACCESS_LOCAL_WRITE)) + goto exit; + + // re-register buffer with the correct access rights + iobuf_init( (u64)vaddr, size, is_user, &sec_iobuf ); + nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( vaddr, size ); + subregion_start = va = (char*)(ULONG_PTR)vaddr; + rdc = size; + pa_in = page_in = page_in_total = page_out = 0; + + for (i=0; i rdc) + subregion_size = rdc; + + // register the subregion + rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf); + if (rc) + goto cleanup; + + // prepare to the next loop + rdc -= subregion_size; + subregion_start +=subregion_size; + } + } + + // prepare to registration of the subregion + if (pa_in) { // SUBREGION WITH READ ACCESS + acc = 0; + subregion_size = (u64)page_in * PAGE_SIZE; + } + else { // SUBREGION WITH WRITE ACCESS + acc = IB_ACCESS_LOCAL_WRITE; + subregion_size = (u64)page_out * PAGE_SIZE; + } + + // round the subregion size to the page boundary + delta = (u64)(subregion_start + subregion_size) & (PAGE_SIZE - 1); + subregion_size -= delta; + if (subregion_size > rdc) + subregion_size = rdc; + + // register the subregion + rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf); + if (rc) + goto cleanup; + + // cash phys pages + rc = pa_register(iobuf_p); + if (rc) + goto err_pa_reg; + + // replace the iobuf + iobuf_deregister( iobuf_p ); + sec_iobuf.is_cashed = TRUE; + __iobuf_copy( iobuf_p, &sec_iobuf ); + + // buffer is a part of also registered buffer - change the rights + if (page_in_total) + *acc_p &= ~IB_ACCESS_LOCAL_WRITE; + + goto exit; + +err_pa_reg: + iobuf_deregister( &sec_iobuf ); +cleanup: + iobuf_deregister( iobuf_p ); +exit: + mutex_unlock(&g_pa_mutex); + return rc; +} + +static void deregister_segment(iobuf_seg_t * iobuf_seg_p) +{ + MmUnlockPages( iobuf_seg_p->mdl_p ); // unlock the buffer + IoFreeMdl( iobuf_seg_p->mdl_p ); // free MDL + ExFreePool(iobuf_seg_p); +} + +void iobuf_deregister(iobuf_t *iobuf_p) +{ + iobuf_seg_t *iobuf_seg_p; // pointer to current segment object + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // release segments + while (!IsListEmpty( &iobuf_p->seg_que )) { + iobuf_seg_p = (iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que ); + deregister_segment(iobuf_seg_p); + iobuf_p->seg_num--; + } + ASSERT(iobuf_p->seg_num == 0); +} + +void iobuf_deregister_with_cash(iobuf_t *iobuf_p) +{ + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + mutex_lock(&g_pa_mutex); + if (iobuf_p->is_cashed) + pa_deregister(iobuf_p); + iobuf_deregister(iobuf_p); + mutex_unlock(&g_pa_mutex); +} + +void iobuf_iter_init( + IN iobuf_t *iobuf_p, + IN OUT iobuf_iter_t *iterator_p) +{ + iterator_p->seg_p = iobuf_p->seg_que.Flink; + iterator_p->pfn_ix = 0; +} + +// the function returns phys addresses of the pages, also for the first page +// if one wants to get the phys address of the buffer, one has to +// add the offset from the start of the page to the first phys address +// Returns: the number of entries, filled in page_tbl_p +// Returns 0 while at the end of list. +uint32_t iobuf_get_tpt_seg( + IN iobuf_t *iobuf_p, + IN OUT iobuf_iter_t *iterator_p, + IN uint32_t n_pages_in, + IN OUT uint64_t *page_tbl_p ) +{ + uint32_t i=0; // has to be initialized here for a premature exit + iobuf_seg_t *seg_p; // pointer to current segment object + PPFN_NUMBER pfn_p; + uint32_t pfn_ix; // index of PFN in PFN array of the current segment + uint64_t *pa_buf_p = page_tbl_p; + + // prepare to the loop + seg_p = iterator_p->seg_p; // first segment of the first iobuf + pfn_ix= iterator_p->pfn_ix; + + // check, whether we at the end of the list + if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) + goto exit; + pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix; + + // pass along all the PFN arrays + for (; i < n_pages_in; i++, pa_buf_p++) { + // convert PFN to the physical address + *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT; + + // get to the next PFN + if (++pfn_ix >= seg_p->nr_pages) { + seg_p = (iobuf_seg_t*)seg_p->link.Flink; + pfn_ix = 0; + if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) { + i++; + break; + } + pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ); + } + } + +exit: + iterator_p->seg_p = seg_p; + iterator_p->pfn_ix = pfn_ix; + return i; +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/l2w.c b/branches/ConnectX/hw/mlx4/kernel/core/l2w.c new file mode 100644 index 00000000..f794495d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/l2w.c @@ -0,0 +1,326 @@ +#include "l2w.h" +#include "core.h" +#include "pa_cash.h" +#include "mlx4.h" + +/* Nth element of the table contains the index of the first set bit of N; 8 - for N=0 */ +char g_set_bit_tbl[256]; + +/* Nth element of the table contains the index of the first 0 bit of N; 8 - for N=255 */ +char g_clr_bit_tbl[256]; + +/* interval for a cmd go-bit waiting */ +// TODO: not clear what is to be this value: +// 1. it has to be enough great, so as the tread will go waiting; +// 2. it has to be enough small, so as there is no too large waiting after first command try; +// 3. it has to be enough great, so as not to cause to intensive rescheduling; +#define CMD_WAIT_USECS 2 +#define CMD_WAIT_INTERVAL ((-10) * CMD_WAIT_USECS) +LARGE_INTEGER g_cmd_interval = { (ULONG)CMD_WAIT_INTERVAL, 0 }; + +//////////////////////////////////////////////////////// +// +// PCI POOL +// +//////////////////////////////////////////////////////// + +pci_pool_t * +pci_pool_create (const char *name, struct pci_dev *pdev, + size_t size, size_t align, size_t allocation) +{ + pci_pool_t *pool; + UNREFERENCED_PARAMETER(align); + UNREFERENCED_PARAMETER(allocation); + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // allocation parameter is not handled yet + ASSERT(allocation == 0); + + //TODO: not absolutely correct: Linux's pci_pool_alloc provides contiguous physical memory, + // while default alloc function - ExAllocatePoolWithTag -doesn't. + // But for now it is used for elements of size <= PAGE_SIZE + // Anyway - a sanity check: + ASSERT(size <= PAGE_SIZE); + if (size > PAGE_SIZE) + return NULL; + + // allocate object + pool = (pci_pool_t *)ExAllocatePoolWithTag( NonPagedPool, sizeof(pci_pool_t), MT_TAG_PCIPOOL ); + if (pool == NULL) + return NULL; + + //TODO: not too effective: one can read its own alloc/free functions + ExInitializeNPagedLookasideList( &pool->pool_hdr, NULL, NULL, 0, size, MT_TAG_PCIPOOL, 0 ); + + // fill the object + pool->mdev = pdev->dev; + pool->size = size; + strncpy( pool->name, name, sizeof pool->name ); + + return pool; +} + + +//////////////////////////////////////////////////////// +// +// BIT TECHNIQUES +// +//////////////////////////////////////////////////////// + +void fill_bit_tbls() +{ + unsigned long i; + for (i=0; i<256; ++i) { + g_set_bit_tbl[i] = (char)(_ffs_raw(&i,0) - 1); + g_clr_bit_tbl[i] = (char)(_ffz_raw(&i,0) - 1); + } + g_set_bit_tbl[0] = g_clr_bit_tbl[255] = 8; +} + + +//////////////////////////////////////////////////////// +// +// BIT MAPS +// +//////////////////////////////////////////////////////// + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + + +//////////////////////////////////////////////////////// +// +// DEBUG PRINT +// +//////////////////////////////////////////////////////// + +VOID +WriteEventLogEntry( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + ULONG pi_nDataItems, + ... + ) +/*++ + +Routine Description: + Writes an event log entry to the event log. + +Arguments: + + pi_pIoObject......... The IO object ( driver object or device object ). + pi_ErrorCode......... The error code. + pi_UniqueErrorCode... A specific error code. + pi_FinalStatus....... The final status. + pi_nDataItems........ Number of data items. + . + . data items values + . + +Return Value: + + None . + +--*/ +{ /* WriteEventLogEntry */ + + /* Variable argument list */ + va_list l_Argptr; + /* Pointer to an error log entry */ + PIO_ERROR_LOG_PACKET l_pErrorLogEntry; + + /* Init the variable argument list */ + va_start(l_Argptr, pi_nDataItems); + + /* Allocate an error log entry */ + l_pErrorLogEntry = + (PIO_ERROR_LOG_PACKET)IoAllocateErrorLogEntry( + pi_pIoObject, + (UCHAR)(sizeof(IO_ERROR_LOG_PACKET)+pi_nDataItems*sizeof(ULONG)) + ); + /* Check allocation */ + if ( l_pErrorLogEntry != NULL) + { /* OK */ + + /* Data item index */ + USHORT l_nDataItem ; + + /* Set the error log entry header */ + l_pErrorLogEntry->ErrorCode = pi_ErrorCode; + l_pErrorLogEntry->DumpDataSize = (USHORT) (pi_nDataItems*sizeof(ULONG)); + l_pErrorLogEntry->SequenceNumber = 0; + l_pErrorLogEntry->MajorFunctionCode = 0; + l_pErrorLogEntry->IoControlCode = 0; + l_pErrorLogEntry->RetryCount = 0; + l_pErrorLogEntry->UniqueErrorValue = pi_UniqueErrorCode; + l_pErrorLogEntry->FinalStatus = pi_FinalStatus; + + /* Insert the data items */ + for (l_nDataItem = 0; l_nDataItem < pi_nDataItems; l_nDataItem++) + { /* Inset a data item */ + + /* Current data item */ + int l_CurDataItem ; + + /* Get next data item */ + l_CurDataItem = va_arg( l_Argptr, int); + + /* Put it into the data array */ + l_pErrorLogEntry->DumpData[l_nDataItem] = l_CurDataItem ; + + } /* Inset a data item */ + + /* Write the packet */ + IoWriteErrorLogEntry(l_pErrorLogEntry); + + } /* OK */ + + /* Term the variable argument list */ + va_end(l_Argptr); + +} /* WriteEventLogEntry */ + + +//////////////////////////////////////////////////////// +// +// GENERAL +// +//////////////////////////////////////////////////////// + +// from lib/string.c +/** +* strlcpy - Copy a %NUL terminated string into a sized buffer +* @dest: Where to copy the string to +* @src: Where to copy the string from +* @size: size of destination buffer +* +* Compatible with *BSD: the result is always a valid +* NUL-terminated string that fits in the buffer (unless, +* of course, the buffer size is zero). It does not pad +* out the result like strncpy() does. +*/ +SIZE_T strlcpy(char *dest, const void *src, SIZE_T size) +{ + SIZE_T ret = strlen(src); + + if (size) { + SIZE_T len = (ret >= size) ? size-1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +int core_init() +{ + int err; + + fill_bit_tbls(); + init_qp_state_tbl(); + err = ib_core_init(); + if (err) + return err; + return pa_cash_init(); +} + +void core_cleanup() +{ + ib_core_cleanup(); + pa_cash_release(); +} + +#ifdef USE_WDM_INTERRUPTS + +// TODO: put into Globals +uint32_t g_processor_affinity = 0; + +int request_irq( + IN struct mlx4_dev * dev, + IN ULONG vector, /* interrupt or MSI-X vector */ + IN PKSERVICE_ROUTINE isr, /* ISR */ + IN PVOID isr_ctx, /* ISR context */ + IN dpc_t dpc, + IN PVOID dpc_ctx, /* ISR context */ + OUT PKINTERRUPT * int_obj /* interrupt object */ + ) +{ + int i; + NTSTATUS status; + struct mlx4_priv *priv = mlx4_priv(dev); + struct pci_dev *pdev = dev->pdev; /* interrupt resources */ + +#ifdef CONFIG_PCI_MSI + // TODO: for MSI interrupts one needs to use new API +#if (NTDDI_VERSION >= NTDDI_LONGHORN) + NTKERNELAPI + NTSTATUS + IoConnectInterruptEx ( + __inout PIO_CONNECT_INTERRUPT_PARAMETERS Parameters + ); +#endif // NTDDI_VERSION >= NTDDI_LONGHORN + +#else + UNUSED_PARAM(dpc_ctx); +#endif + + KeInitializeSpinLock( &pdev->isr_lock ); + + status = IoConnectInterrupt( + int_obj, /* InterruptObject */ + isr, /* ISR */ + isr_ctx, /* ISR context */ + &pdev->isr_lock, /* spinlock */ + vector, /* interrupt vector */ + (KIRQL)pdev->int_info.u.Interrupt.Level, /* IRQL */ + (KIRQL)pdev->int_info.u.Interrupt.Level, /* Synchronize IRQL */ + (BOOLEAN)((pdev->int_info.Flags == CM_RESOURCE_INTERRUPT_LATCHED) ? + Latched : LevelSensitive), /* interrupt type: LATCHED or LEVEL */ + (BOOLEAN)(pdev->int_info.ShareDisposition == CmResourceShareShared), /* vector shared or not */ + g_processor_affinity ? g_processor_affinity : (KAFFINITY)pdev->int_info.u.Interrupt.Affinity, /* interrupt affinity */ + FALSE /* whether to save Float registers */ + ); + + if (!NT_SUCCESS(status)) { + MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_INIT ,("IoConnectInterrupt failed status %d (did you change the processor_affinity ? )\n",status)); + return -EFAULT; /* failed to connect interrupt */ + } + + /* init DPC stuff */ + pdev->dpc_lock = 0; + for (i = 0; i < MLX4_NUM_EQ; ++i) { + spin_lock_init( &priv->eq_table.eq[i].lock ); + KeInitializeDpc( &priv->eq_table.eq[i].dpc, dpc, &priv->eq_table.eq[i]); + priv->eq_table.eq[i].eq_ix = i; + } + + return 0; +} +#endif diff --git a/branches/ConnectX/hw/mlx4/kernel/core/l2w_debug.c b/branches/ConnectX/hw/mlx4/kernel/core/l2w_debug.c new file mode 100644 index 00000000..551261df --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/l2w_debug.c @@ -0,0 +1,205 @@ +#include "l2w.h" +#include "ev_log.h" + +#define MAX_BUFFER_SIZE 256 + +VOID +WriteEventLogEntryStr( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + PWCHAR pi_InsertionStr, + ULONG pi_nDataItems, + ... + ) +/*++ + +Routine Description: + Writes an event log entry to the event log. + +Arguments: + + pi_pIoObject......... The IO object ( driver object or device object ). + pi_ErrorCode......... The error code. + pi_UniqueErrorCode... A specific error code. + pi_FinalStatus....... The final status. + pi_nDataItems........ Number of data items. + . + . data items values + . + +Return Value: + + None . + +--*/ +{ /* WriteEventLogEntryStr */ + + /* Variable argument list */ + va_list l_Argptr; + /* Pointer to an error log entry */ + PIO_ERROR_LOG_PACKET l_pErrorLogEntry; + /* sizeof insertion string */ + int l_Size = (int)((pi_InsertionStr) ? ((wcslen(pi_InsertionStr) + 1) * sizeof( WCHAR )) : 0); + int l_PktSize =sizeof(IO_ERROR_LOG_PACKET)+pi_nDataItems*sizeof(ULONG); + int l_TotalSize =l_PktSize +l_Size; + + /* Init the variable argument list */ + va_start(l_Argptr, pi_nDataItems); + + /* Allocate an error log entry */ + if (l_TotalSize >= ERROR_LOG_MAXIMUM_SIZE - 2) + l_TotalSize = ERROR_LOG_MAXIMUM_SIZE - 2; + l_pErrorLogEntry = (PIO_ERROR_LOG_PACKET)IoAllocateErrorLogEntry( + pi_pIoObject, (UCHAR)l_TotalSize ); + + /* Check allocation */ + if ( l_pErrorLogEntry != NULL) + { /* OK */ + + /* Data item index */ + USHORT l_nDataItem ; + + /* Set the error log entry header */ + l_pErrorLogEntry->ErrorCode = pi_ErrorCode; + l_pErrorLogEntry->DumpDataSize = (USHORT) (pi_nDataItems*sizeof(ULONG)); + l_pErrorLogEntry->SequenceNumber = 0; + l_pErrorLogEntry->MajorFunctionCode = 0; + l_pErrorLogEntry->IoControlCode = 0; + l_pErrorLogEntry->RetryCount = 0; + l_pErrorLogEntry->UniqueErrorValue = pi_UniqueErrorCode; + l_pErrorLogEntry->FinalStatus = pi_FinalStatus; + + /* Insert the data items */ + for (l_nDataItem = 0; l_nDataItem < pi_nDataItems; l_nDataItem++) + { /* Inset a data item */ + + /* Current data item */ + int l_CurDataItem ; + + /* Get next data item */ + l_CurDataItem = va_arg( l_Argptr, int); + + /* Put it into the data array */ + l_pErrorLogEntry->DumpData[l_nDataItem] = l_CurDataItem ; + + } /* Inset a data item */ + + /* add insertion string */ + if (pi_InsertionStr) { + char *ptr; + int sz = min( l_TotalSize - l_PktSize, l_Size ); + l_pErrorLogEntry->NumberOfStrings = 1; + l_pErrorLogEntry->StringOffset = sizeof(IO_ERROR_LOG_PACKET) + l_pErrorLogEntry->DumpDataSize; + ptr = (char*)l_pErrorLogEntry + l_pErrorLogEntry->StringOffset; + memcpy( ptr, pi_InsertionStr, sz ); + *(WCHAR*)&ptr[sz - 2] = (WCHAR)0; + } + + /* Write the packet */ + IoWriteErrorLogEntry(l_pErrorLogEntry); + + } /* OK */ + + /* Term the variable argument list */ + va_end(l_Argptr); + +} /* WriteEventLogEntry */ + + +VOID +mlx4_err( + IN struct mlx4_dev * mdev, + IN char* format, + ... + ) +{ + va_list list; + UCHAR buf[MAX_BUFFER_SIZE]; + WCHAR wbuf[MAX_BUFFER_SIZE]; + + // print to Debugger + va_start(list, format); + buf[MAX_BUFFER_SIZE - 1] = '\0'; + RtlStringCbVPrintfA( (char*)buf, sizeof(buf), format, list); + DbgPrint( (char*)buf ); + va_end(list); + + // print to Event Log + if (!RtlStringCchPrintfW(wbuf, sizeof(wbuf), L"%S", buf)) + WriteEventLogEntryStr( mdev->pdev->p_self_do, (ULONG)EVENT_MLX4_ANY_ERROR, 0, 0, wbuf, 0, 0 ); +} + +VOID +mlx4_dbg( + IN struct mlx4_dev * mdev, + IN char* format, + ... + ) +{ +#if DBG + va_list list; + UCHAR buf[MAX_BUFFER_SIZE]; + UNUSED_PARAM(mdev); + + // print to Debugger + va_start(list, format); + buf[MAX_BUFFER_SIZE - 1] = '\0'; + RtlStringCbVPrintfA( (char*)buf, sizeof(buf), format, list); + DbgPrint( (char*)buf ); + va_end(list); +#else + UNUSED_PARAM(mdev); + UNUSED_PARAM(format); +#endif //DBG +} + +VOID +dev_err( + IN struct mlx4_dev ** mdev, + IN char* format, + ... + ) +{ + va_list list; + UCHAR buf[MAX_BUFFER_SIZE]; + WCHAR wbuf[MAX_BUFFER_SIZE]; + + // print to Debugger + va_start(list, format); + buf[MAX_BUFFER_SIZE - 1] = '\0'; + RtlStringCbVPrintfA( (char*)buf, sizeof(buf), format, list); + DbgPrint( (char*)buf ); + va_end(list); + + // print to Event Log + RtlStringCchPrintfW(wbuf, sizeof(wbuf), L"%S", buf); + WriteEventLogEntryStr( (*mdev)->pdev->p_self_do, (ULONG)EVENT_MLX4_ANY_ERROR, 0, 0, wbuf, 0, 0 ); +} + +VOID +dev_info( + IN struct mlx4_dev ** p_mdev, + IN char* format, + ... + ) +{ +#if DBG + va_list list; + UCHAR buf[MAX_BUFFER_SIZE]; + UNUSED_PARAM(p_mdev); + + // print to Debugger + va_start(list, format); + buf[MAX_BUFFER_SIZE - 1] = '\0'; + RtlStringCbVPrintfA( (char*)buf, sizeof(buf), format, list); + DbgPrint( (char*)buf ); + va_end(list); +#else + UNUSED_PARAM(p_mdev); + UNUSED_PARAM(format); +#endif +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/l2w_memory.c b/branches/ConnectX/hw/mlx4/kernel/core/l2w_memory.c new file mode 100644 index 00000000..bb0f8851 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/l2w_memory.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mt_memory.c 2020 2007-05-01 09:29:10Z leonid $ + */ +#include "l2w.h" + +#if defined (EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "l2w_memory.tmh" +#endif + + + +void *alloc_cont_mem( + IN struct pci_dev *pdev, + IN unsigned long size, + OUT dma_addr_t*p_dma_addr) +{ + void *va; + DMA_ADAPTER *p_adapter = pdev->p_dma_adapter; + PHYSICAL_ADDRESS pa = {0}; + + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + memset( p_dma_addr, 0, sizeof(dma_addr_t) ); + + if (!size) + return NULL; + + va = p_adapter->DmaOperations->AllocateCommonBuffer( + p_adapter, (ULONG)size, &pa, FALSE ); + if (va) { + p_dma_addr->da = pa.QuadPart; + p_dma_addr->va = va; + p_dma_addr->sz = (ULONG)size; + } + + return va; +} + +void free_cont_mem( + IN struct pci_dev *pdev, + IN dma_addr_t*p_dma_addr) +{ + PHYSICAL_ADDRESS pa; + DMA_ADAPTER *p_adapter = pdev->p_dma_adapter; + + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + pa.QuadPart = p_dma_addr->da; + p_adapter->DmaOperations->FreeCommonBuffer( + p_adapter, (ULONG)p_dma_addr->sz, pa, p_dma_addr->va, FALSE ); +} + +void * +dma_alloc_coherent( struct mlx4_dev **dev, size_t size, + dma_addr_t *p_dma_addr, gfp_t gfp ) +{ + UNUSED_PARAM(gfp); + + if (!size) + return NULL; + return alloc_cont_mem( (*dev)->pdev, (unsigned long)size, p_dma_addr ); +} + +void +dma_free_coherent( struct mlx4_dev **dev, size_t size, + void *vaddr, dma_addr_t dma_addr) +{ + UNUSED_PARAM(size); + UNUSED_PARAM(vaddr); + ASSERT(size == dma_addr.sz); + ASSERT(vaddr == dma_addr.va); + free_cont_mem( (*dev)->pdev, &dma_addr ); +} + +void +pci_free_consistent( struct pci_dev *pdev, size_t size, + void *vaddr, dma_addr_t dma_addr) +{ + dma_free_coherent( &pdev->dev, size, vaddr, dma_addr ); +} + + + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/l2w_radix.c b/branches/ConnectX/hw/mlx4/kernel/core/l2w_radix.c new file mode 100644 index 00000000..abea412c --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/l2w_radix.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: radix.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "l2w.h" +#include "errno.h" + +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + if ( NULL == cl_map_insert( &root->map, (const uint64_t)index, item ) ) + return -EFAULT; + return 0; +} + +void *radix_tree_lookup(struct radix_tree_root *root, + unsigned long index) +{ + void* item = cl_map_get( &root->map, (const uint64_t)index ); + return item; +} + +void *radix_tree_delete(struct radix_tree_root *root, + unsigned long index) +{ + void* item = cl_map_remove( &root->map, (const uint64_t)index ); + return item; +} + +cl_status_t radix_tree_create(struct radix_tree_root *root, + gfp_t gfp_mask) +{ +#define MIN_ITEMS 32 + cl_status_t cl_status; + UNUSED_PARAM(gfp_mask); + + cl_map_construct( &root->map ); + cl_status = cl_map_init( &root->map, MIN_ITEMS ); + return cl_status; +} + +void radix_tree_destroy(struct radix_tree_root *root ) +{ + cl_map_destroy( &root->map ); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/l2w_umem.c b/branches/ConnectX/hw/mlx4/kernel/core/l2w_umem.c new file mode 100644 index 00000000..fe449c4e --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/l2w_umem.c @@ -0,0 +1,164 @@ + +#include "l2w.h" +#include "ib_verbs.h" + +/** + * ib_umem_release - release memory pinned with ib_umem_get + * @umem: umem struct to release + */ +void ib_umem_release(struct ib_umem *p_ib_umem) +{ + MLX4_ENTER(MLX4_DBG_MEMORY); + if (p_ib_umem->secure_handle) + MmUnsecureVirtualMemory( p_ib_umem->secure_handle ); + if (p_ib_umem->iobuf_used) + iobuf_deregister_with_cash(&p_ib_umem->iobuf); + kfree(p_ib_umem); + MLX4_EXIT(MLX4_DBG_MEMORY); +} + + +/** + * ib_umem_get - Pin and DMA map userspace memory. + * @context: userspace context to pin memory for + * @addr: userspace virtual address to start at + * @size: length of region to pin + * @access: IB_ACCESS_xxx flags for memory being pinned + */ +struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr, + size_t size, enum ib_access_flags access) +{ + int err; + struct ib_umem *p_ib_umem; + + MLX4_ENTER(MLX4_DBG_MEMORY); + + // create the object + p_ib_umem = kzalloc(sizeof *p_ib_umem, GFP_KERNEL); + if (!p_ib_umem) + goto done; + + p_ib_umem->p_uctx = context; + p_ib_umem->page_size = PAGE_SIZE; + + // register the memory + iobuf_init( addr, (u64)size, !!context, &p_ib_umem->iobuf); + err = iobuf_register_with_cash( addr, (u64)size, !!context, + &access, &p_ib_umem->iobuf ); + if (err) + goto err_reg_mem; + p_ib_umem->iobuf_used = TRUE; + + // TODO: map the memory for DMA + + // secure memory + if (!context) + goto done; + __try { + p_ib_umem->secure_handle = MmSecureVirtualMemory ( + (PVOID)(ULONG_PTR)addr, size, + (access & IB_ACCESS_LOCAL_WRITE) ? PAGE_READWRITE : PAGE_READONLY ); + if (p_ib_umem->secure_handle == NULL) + goto err_secure; + } + __except (EXCEPTION_EXECUTE_HANDLER) { + NTSTATUS Status = GetExceptionCode(); + UNUSED_PARAM_WOWPP(Status); + MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY , + ("Exception 0x%x on MmSecureVirtualMemory(), addr %I64x, size %I64x, access %#x\n", + Status, addr, (u64)size, access )); + goto err_secure; + } + goto done; + +err_secure: + iobuf_deregister(&p_ib_umem->iobuf); + +err_reg_mem: + kfree(p_ib_umem); + p_ib_umem = NULL; + +done: + MLX4_EXIT(MLX4_DBG_MEMORY); + return p_ib_umem; +} + +int ib_umem_page_count(struct ib_umem *p_ib_umem) +{ + return (int)p_ib_umem->iobuf.nr_pages; +} + +dma_addr_t ib_umem_get_dma(struct ib_umem *p_ib_umem) +{ + u64 pages[1] = { 0 }; + iobuf_iter_t iobuf_iter; + dma_addr_t dma_addr = { 0, 0 , 0 }; + + iobuf_iter_init( &p_ib_umem->iobuf, &iobuf_iter ); + iobuf_get_tpt_seg( &p_ib_umem->iobuf, &iobuf_iter, 1, pages ); + // TODO: convert phys address to DMA one + dma_addr.da = pages[0]; + + return dma_addr; +} + + +// Returns: 0 on success, -ENOMEM or -EACCESS or -EFAULT on error +int ib_umem_map( + IN u64 va, + IN u64 size, + IN ib_access_t acc, + OUT PMDL *mdl, + OUT void **kva) +{ + PMDL p_mdl; + int rc = 0; + LOCK_OPERATION lock_op = (acc & IB_AC_LOCAL_WRITE) ? IoModifyAccess : IoReadAccess; + + p_mdl = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL); + if (p_mdl == NULL) { + rc = -ENOMEM; + goto err_alloc_mdl; + } + + __try { + MmProbeAndLockPages( p_mdl, UserMode, lock_op ); /* lock memory */ + } + __except (EXCEPTION_EXECUTE_HANDLER) { + MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_MEMORY, + ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", + GetExceptionCode(), va, size)); + rc = -EACCES; + goto err_probe; + } + + *kva = MmMapLockedPagesSpecifyCache( p_mdl, + KernelMode, MmNonCached, NULL, FALSE, NormalPagePriority ); + if (*kva == NULL) { + MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n")); + rc = -EFAULT; + goto err_map; + } + + *mdl = p_mdl; + return 0; + +err_map: + MmUnlockPages(p_mdl); +err_probe: + IoFreeMdl(p_mdl); +err_alloc_mdl: + return rc; +} + +void ib_umem_unmap( + IN PMDL p_mdl, + IN void *kva) +{ + if (kva) { + MmUnmapLockedPages( kva, p_mdl ); + MmUnlockPages(p_mdl); + IoFreeMdl(p_mdl); + } +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/makefile b/branches/ConnectX/hw/mlx4/kernel/core/makefile new file mode 100644 index 00000000..a0c06273 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\..\inc\openib.def diff --git a/branches/ConnectX/hw/mlx4/kernel/core/pa_cash.c b/branches/ConnectX/hw/mlx4/kernel/core/pa_cash.c new file mode 100644 index 00000000..b6826457 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/pa_cash.c @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mlnx_uvp_cq.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "l2w.h" +#include "pa_cash.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "pa_cash.tmh" +#endif + +/////////////////////////////////////////////////////////////////////////// +// +// RESTRICTIONS +// +/////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN64 +#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) // 256 GB +#else +#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) // 64 GB +#endif + +#define FREE_LIST_TRESHOLD 256 // max number of pages in free list + +/////////////////////////////////////////////////////////////////////////// +// +// CONSTANTS +// +/////////////////////////////////////////////////////////////////////////// + +#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) +#define PA_TABLE_ENTRY_NUM (PAGE_SIZE / PA_TABLE_ENTRY_SIZE) +#define PA_TABLE_SIZE (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM) + +#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) +#define PA_DIR_ENTRY_NUM (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM) +#define PA_DIR_SIZE (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM) + + +/////////////////////////////////////////////////////////////////////////// +// +// STRUCTURES +// +/////////////////////////////////////////////////////////////////////////// + +typedef struct { + int ref_cnt; +} pa_table_entry_t; + +typedef struct { + pa_table_entry_t *pa_te; /* pointer to one page of pa_table_entry_t elements */ + int used; /* number of pa_table_entry_t elements, used now. When 0 - the page may be freed */ +} pa_dir_entry_t; + +typedef struct pa_cash_s { + pa_dir_entry_t *pa_dir; + SINGLE_LIST_ENTRY free_list_hdr; + uint32_t free_nr_pages; + uint32_t free_list_threshold; + uint32_t max_nr_pages; + uint32_t cur_nr_pages; +} pa_cash_t; + + + +/////////////////////////////////////////////////////////////////////////// +// +// GLOBALS +// +/////////////////////////////////////////////////////////////////////////// + +DEFINE_MUTEX(g_pa_mutex); +u64 g_pa[1024]; +pa_cash_t g_cash; + + +/////////////////////////////////////////////////////////////////////////// +// +// STATIC FUNCTIONS +// +/////////////////////////////////////////////////////////////////////////// + +static uint32_t pa_calc_threshold() +{ + // threshold expresses the max length of free pages list, which gets released only at driver unload time + // so it can be calculated to be proportional to the system memory size + return FREE_LIST_TRESHOLD; +} + +static pa_table_entry_t *pa_alloc_page() +{ + pa_table_entry_t *pa_te; + + /* take from the list of reserved if it is not empty */ + if (g_cash.free_nr_pages) { + pa_te = (pa_table_entry_t *)PopEntryList( &g_cash.free_list_hdr ); + ((SINGLE_LIST_ENTRY*)pa_te)->Next = NULL; + g_cash.free_nr_pages--; + } + else /* allocate new page */ + pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL ); + + return pa_te; +} + +static void pa_free_page(pa_table_entry_t *pa_te) +{ + if (g_cash.free_nr_pages < g_cash.free_list_threshold) { + PushEntryList( &g_cash.free_list_hdr, (SINGLE_LIST_ENTRY*)pa_te ); + g_cash.free_nr_pages++; + } + else + kfree(pa_te); +} + +static pa_table_entry_t * pa_get_page(uint32_t ix) +{ + pa_table_entry_t *pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table - add a new one */ + if (!pa_te) { + pa_te = pa_alloc_page(); + if (!pa_te) + return NULL; + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te; + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0; + g_cash.cur_nr_pages++; + } + + return pa_te; +} + +static void pa_put_page(uint32_t ix) +{ + pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te); + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL; + g_cash.cur_nr_pages--; +} + +static int pa_add_pa(uint64_t pa) +{ + uint32_t ix = (uint32_t)(pa >> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + /* get page address */ + pa_te = pa_get_page(ix); + if (!pa_te) + return -ENOMEM; + + /* register page address */ + if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) + ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; + ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + + return 0; +} + + +static int pa_rmv_pa(uint64_t pa) +{ + uint32_t ix = (uint32_t)(pa >> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table - error*/ + if (!pa_te) { + ASSERT(FALSE); + return -EFAULT; + } + + /* deregister page address */ + --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0); + + /* release the page on need */ + if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) + --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; + if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used) + pa_put_page(ix); + + return 0; +} + + + +/////////////////////////////////////////////////////////////////////////// +// +// PUBLIC FUNCTIONS +// +/////////////////////////////////////////////////////////////////////////// + + +int pa_register(iobuf_t *iobuf_p) +{ + int i,j,n; + iobuf_iter_t iobuf_iter; + + iobuf_iter_init( iobuf_p, &iobuf_iter ); + n = 0; + for (;;) { + i = iobuf_get_tpt_seg( iobuf_p, &iobuf_iter, + sizeof(g_pa) / sizeof (u64), g_pa ); + if (!i) + break; + for (j=0; j> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table */ + if (!pa_te) + return 0; + + return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; +} + +int pa_cash_init() +{ + void *pa_dir; + pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL); + + if (!pa_dir) + return -ENOMEM; + g_cash.pa_dir = pa_dir; + g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM; + g_cash.free_list_hdr.Next = NULL; + g_cash.cur_nr_pages = 0; + g_cash.free_nr_pages = 0; + g_cash.free_list_threshold = pa_calc_threshold(); + mutex_init(&g_pa_mutex); + return 0; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/pa_cash.h b/branches/ConnectX/hw/mlx4/kernel/core/pa_cash.h new file mode 100644 index 00000000..a231c890 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/pa_cash.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mlnx_uvp_cq.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "iobuf.h" + +extern struct mutex g_pa_mutex; + +int pa_cash_init(); + +void pa_cash_release(); + +int pa_is_registered(uint64_t pa); + +int pa_register(iobuf_t *iobuf_p); + +void pa_deregister(iobuf_t *iobuf_p); + +void pa_cash_print(); + diff --git a/branches/ConnectX/hw/mlx4/kernel/core/packer.c b/branches/ConnectX/hw/mlx4/kernel/core/packer.c new file mode 100644 index 00000000..5b0da952 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/packer.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include "l2w.h" +#include "ib_pack.h" + +static u64 value_read(int offset, int size, u8 *structure) +{ + switch (size) { + case 1: return *(u8 *) (structure + offset); + case 2: return be16_to_cpup((__be16 *) (structure + offset)); + case 4: return be32_to_cpup((__be32 *) (structure + offset)); + case 8: return be64_to_cpup((__be64 *) (structure + offset)); + default: + printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + return 0; + } +} + +/** + * ib_pack - Pack a structure into a buffer + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @structure:Structure to pack from + * @buf:Buffer to pack into + * + * ib_pack() packs a list of structure fields into a buffer, + * controlled by the array of fields in @desc. + */ +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + u8 *buf) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + __be32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = (u32)(value_read((int)desc[i].struct_offset_bytes, + (int)desc[i].struct_size_bytes, + structure) << shift); + else + val = 0; + + mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift); + addr = (__be32 *) buf + desc[i].offset_words; + *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + __be64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = value_read((int)desc[i].struct_offset_bytes, + (int)desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); + addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); + *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + printk(KERN_WARNING "Structure field %s of size %d " + "bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); + } + + if (desc[i].struct_size_bytes) + memcpy(buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + (u8*)structure + desc[i].struct_offset_bytes, + desc[i].size_bits / 8); + else + memset(buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + 0, + desc[i].size_bits / 8); + } + } +} +EXPORT_SYMBOL(ib_pack); + +static void value_write(int offset, int size, u64 val, u8 *structure) +{ + switch (size * 8) { + case 8: *( u8 *) (structure + offset) = (u8)val; break; + case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break; + case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break; + case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break; + default: + printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + } +} + +/** + * ib_unpack - Unpack a buffer into a structure + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @buf:Buffer to unpack from + * @structure:Structure to unpack into + * + * ib_pack() unpacks a list of structure fields from a buffer, + * controlled by the array of fields in @desc. + */ +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (!desc[i].struct_size_bytes) + continue; + + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + u32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + mask = ((1ull << desc[i].size_bits) - 1) << shift; + addr = (__be32 *) buf + desc[i].offset_words; + val = (be32_to_cpup(addr) & mask) >> shift; + value_write((int)desc[i].struct_offset_bytes, + (int)desc[i].struct_size_bytes, + val, + structure); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + u64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + mask = (~0ull >> (64 - desc[i].size_bits)) << shift; + addr = (__be64 *) buf + desc[i].offset_words; + val = (be64_to_cpup(addr) & mask) >> shift; + value_write((int)desc[i].struct_offset_bytes, + (int)desc[i].struct_size_bytes, + val, + structure); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + printk(KERN_WARNING "Structure field %s of size %d " + "bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); + } + + memcpy((u8*)structure + desc[i].struct_offset_bytes, + (u8*)buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + desc[i].size_bits / 8); + } + } +} +EXPORT_SYMBOL(ib_unpack); diff --git a/branches/ConnectX/hw/mlx4/kernel/core/ud_header.c b/branches/ConnectX/hw/mlx4/kernel/core/ud_header.c new file mode 100644 index 00000000..4be128e2 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/ud_header.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include "l2w.h" +#include "ib_pack.h" + +#define STRUCT_FIELD(header, field) \ + .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ + .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + .field_name = #header ":" #field + +#define STRUCT_FIELD_INIT(header, field,ow,ob,sb) \ + offsetof(struct ib_unpacked_ ## header, field), \ + sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + ow,ob,sb, \ + #header ":" #field + +#define STRUCT_FIELD_INITR(ow,ob,sb) \ + 0, 0, ow, ob, sb, "reserved" + +static const struct ib_field lrh_table[] = { + { STRUCT_FIELD_INIT(lrh, virtual_lane, 0, 0, 4) }, + { STRUCT_FIELD_INIT(lrh, link_version, 0, 4, 4) }, + { STRUCT_FIELD_INIT(lrh, service_level, 0, 8, 4) }, + { STRUCT_FIELD_INITR(0,12,2) }, + { STRUCT_FIELD_INIT(lrh, link_next_header, 0, 14, 2) }, + { STRUCT_FIELD_INIT(lrh, destination_lid, 0, 16, 16) }, + { STRUCT_FIELD_INITR(1,0,5) }, + { STRUCT_FIELD_INIT(lrh, packet_length, 1, 5, 11) }, + { STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) } +}; + +static const struct ib_field grh_table[] = { + { STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) }, + { STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) }, + { STRUCT_FIELD_INIT(grh, flow_label, 0, 12, 20) }, + { STRUCT_FIELD_INIT(grh, payload_length, 1, 0, 16) }, + { STRUCT_FIELD_INIT(grh, next_header, 1, 16, 8) }, + { STRUCT_FIELD_INIT(grh, hop_limit, 1, 24, 8) }, + { STRUCT_FIELD_INIT(grh, source_gid, 2, 0, 128) }, + { STRUCT_FIELD_INIT(grh, destination_gid, 6, 0, 128) } +}; + +static const struct ib_field bth_table[] = { + { STRUCT_FIELD_INIT(bth, opcode, 0, 0, 8) }, + { STRUCT_FIELD_INIT(bth, solicited_event, 0, 8, 1) }, + { STRUCT_FIELD_INIT(bth, mig_req, 0, 9, 1) }, + { STRUCT_FIELD_INIT(bth, pad_count, 0, 10, 2) }, + { STRUCT_FIELD_INIT(bth, transport_header_version, 0, 12, 4) }, + { STRUCT_FIELD_INIT(bth, pkey, 0, 16, 16) }, + { STRUCT_FIELD_INITR(1,0,8) }, + { STRUCT_FIELD_INIT(bth, destination_qpn, 1, 8, 24) }, + { STRUCT_FIELD_INIT(bth, ack_req, 2, 0, 1) }, + { STRUCT_FIELD_INITR(2,1,7) }, + { STRUCT_FIELD_INIT(bth, psn, 2, 8, 24) } +}; + +static const struct ib_field deth_table[] = { + { STRUCT_FIELD_INIT(deth, qkey, 0, 0, 32) }, + { STRUCT_FIELD_INITR(1,0,8) }, + { STRUCT_FIELD_INIT(deth, source_qpn, 1, 8, 24) } +}; + +/** + * ib_ud_header_init - Initialize UD header structure + * @payload_bytes:Length of packet payload + * @grh_present:GRH flag (if non-zero, GRH will be included) + * @header:Structure to initialize + * + * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, + * lrh.packet_length, grh.ip_version, grh.payload_length, + * grh.next_header, bth.opcode, bth.pad_count and + * bth.transport_header_version fields of a &struct ib_ud_header given + * the payload length and whether a GRH will be included. + */ +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header) +{ + int header_len; + u16 packet_length; + + memset(header, 0, sizeof *header); + + header_len = + IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES; + if (grh_present) { + header_len += IB_GRH_BYTES; + } + + header->lrh.link_version = 0; + header->lrh.link_next_header = + grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; + packet_length = (u16)((IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) / 4); /* round up */ + + header->grh_present = grh_present; + if (grh_present) { + packet_length += IB_GRH_BYTES / 4; + header->grh.ip_version = 6; + header->grh.payload_length = + cpu_to_be16((IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) & ~3); /* round up */ + header->grh.next_header = 0x1b; + } + + header->lrh.packet_length = cpu_to_be16(packet_length); + + if (header->immediate_present) + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + else + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + header->bth.pad_count = (u8)((4 - payload_bytes) & 3); + header->bth.transport_header_version = 0; +} +EXPORT_SYMBOL(ib_ud_header_init); + +/** + * ib_ud_header_pack - Pack UD header struct into wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() packs the UD header structure @header into wire + * format in the buffer @buf. + */ +int ib_ud_header_pack(struct ib_ud_header *header, + u8 *buf) +{ + int len = 0; + + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), + &header->lrh, buf); + len += IB_LRH_BYTES; + + if (header->grh_present) { + ib_pack(grh_table, ARRAY_SIZE(grh_table), + &header->grh, buf + len); + len += IB_GRH_BYTES; + } + + ib_pack(bth_table, ARRAY_SIZE(bth_table), + &header->bth, buf + len); + len += IB_BTH_BYTES; + + ib_pack(deth_table, ARRAY_SIZE(deth_table), + &header->deth, buf + len); + len += IB_DETH_BYTES; + + if (header->immediate_present) { + memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data); + len += sizeof header->immediate_data; + } + + return len; +} +EXPORT_SYMBOL(ib_ud_header_pack); + +/** + * ib_ud_header_unpack - Unpack UD header struct from wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() unpacks the UD header structure @header from wire + * format in the buffer @buf. + */ +int ib_ud_header_unpack(u8 *buf, + struct ib_ud_header *header) +{ + ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), + buf, &header->lrh); + buf += IB_LRH_BYTES; + + if (header->lrh.link_version != 0) { + printk(KERN_WARNING "Invalid LRH.link_version %d\n", + header->lrh.link_version); + return -EINVAL; + } + + switch (header->lrh.link_next_header) { + case IB_LNH_IBA_LOCAL: + header->grh_present = 0; + break; + + case IB_LNH_IBA_GLOBAL: + header->grh_present = 1; + ib_unpack(grh_table, ARRAY_SIZE(grh_table), + buf, &header->grh); + buf += IB_GRH_BYTES; + + if (header->grh.ip_version != 6) { + printk(KERN_WARNING "Invalid GRH.ip_version %d\n", + header->grh.ip_version); + return -EINVAL; + } + if (header->grh.next_header != 0x1b) { + printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n", + header->grh.next_header); + return -EINVAL; + } + break; + + default: + printk(KERN_WARNING "Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header); + return -EINVAL; + } + + ib_unpack(bth_table, ARRAY_SIZE(bth_table), + buf, &header->bth); + buf += IB_BTH_BYTES; + + switch (header->bth.opcode) { + case IB_OPCODE_UD_SEND_ONLY: + header->immediate_present = 0; + break; + case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: + header->immediate_present = 1; + break; + default: + printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n", + header->bth.opcode); + return -EINVAL; + } + + if (header->bth.transport_header_version != 0) { + printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version); + return -EINVAL; + } + + ib_unpack(deth_table, ARRAY_SIZE(deth_table), + buf, &header->deth); + buf += IB_DETH_BYTES; + + if (header->immediate_present) + memcpy(&header->immediate_data, buf, sizeof header->immediate_data); + + return 0; +} +EXPORT_SYMBOL(ib_ud_header_unpack); diff --git a/branches/ConnectX/hw/mlx4/kernel/core/verbs.c b/branches/ConnectX/hw/mlx4/kernel/core/verbs.c new file mode 100644 index 00000000..df9acac8 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/core/verbs.c @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "device.tmh" +#endif + +#include "l2w.h" +#include "ib_verbs.h" + +// qp_state_table +static struct { + int valid; + enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1]; + enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1]; +} qst[XIB_QPS_ERR + 1][XIB_QPS_ERR + 1]; + + +void init_qp_state_tbl() +{ + memset( qst, 0, sizeof(qst) ); + + // + // XIB_QPS_RESET + // + + // XIB_QPS_RESET + qst[XIB_QPS_RESET][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_RESET][XIB_QPS_ERR].valid = 1; + + // XIB_QPS_INIT + + qst[XIB_QPS_RESET][XIB_QPS_INIT].valid = 1; + qst[XIB_QPS_RESET][XIB_QPS_INIT].req_param[IB_QPT_UD] = + (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY); + qst[XIB_QPS_RESET][XIB_QPS_INIT].req_param[IB_QPT_UC] = + (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS); + qst[XIB_QPS_RESET][XIB_QPS_INIT].req_param[IB_QPT_RC] = + (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS); + qst[XIB_QPS_RESET][XIB_QPS_INIT].req_param[IB_QPT_SMI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + qst[XIB_QPS_RESET][XIB_QPS_INIT].req_param[IB_QPT_GSI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + + // + // XIB_QPS_INIT + // + + // XIB_QPS_RESET + qst[XIB_QPS_INIT][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_INIT][XIB_QPS_ERR].valid = 1; + + // XIB_QPS_INIT + qst[XIB_QPS_INIT][XIB_QPS_INIT].valid = 1; + + qst[XIB_QPS_INIT][XIB_QPS_INIT].opt_param[IB_QPT_UD] = + (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY); + qst[XIB_QPS_INIT][XIB_QPS_INIT].opt_param[IB_QPT_UC] = + (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS); + qst[XIB_QPS_INIT][XIB_QPS_INIT].opt_param[IB_QPT_RC] = + (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS); + qst[XIB_QPS_INIT][XIB_QPS_INIT].opt_param[IB_QPT_SMI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + qst[XIB_QPS_INIT][XIB_QPS_INIT].opt_param[IB_QPT_GSI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + + // XIB_QPS_RTR + qst[XIB_QPS_INIT][XIB_QPS_RTR].valid = 1; + + qst[XIB_QPS_INIT][XIB_QPS_RTR].req_param[IB_QPT_UC] = + (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN); + qst[XIB_QPS_INIT][XIB_QPS_RTR].req_param[IB_QPT_RC] = + (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | + IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + + qst[XIB_QPS_INIT][XIB_QPS_RTR].opt_param[IB_QPT_UD] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + qst[XIB_QPS_INIT][XIB_QPS_RTR].opt_param[IB_QPT_UC] = + (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX); + qst[XIB_QPS_INIT][XIB_QPS_RTR].opt_param[IB_QPT_RC] = + (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX); + qst[XIB_QPS_INIT][XIB_QPS_RTR].opt_param[IB_QPT_SMI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + qst[XIB_QPS_INIT][XIB_QPS_RTR].opt_param[IB_QPT_GSI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + + // + // XIB_QPS_RTR + // + + // XIB_QPS_RESET + qst[XIB_QPS_RTR][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_RTR][XIB_QPS_ERR].valid = 1; + + // XIB_QPS_RTS + qst[XIB_QPS_RTR][XIB_QPS_RTS].valid = 1; + + qst[XIB_QPS_RTR][XIB_QPS_RTS].req_param[IB_QPT_UD] = IB_QP_SQ_PSN; + qst[XIB_QPS_RTR][XIB_QPS_RTS].req_param[IB_QPT_UC] = IB_QP_SQ_PSN; + qst[XIB_QPS_RTR][XIB_QPS_RTS].req_param[IB_QPT_RC] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC); + qst[XIB_QPS_RTR][XIB_QPS_RTS].req_param[IB_QPT_SMI] = IB_QP_SQ_PSN; + qst[XIB_QPS_RTR][XIB_QPS_RTS].req_param[IB_QPT_GSI] = IB_QP_SQ_PSN; + + qst[XIB_QPS_RTR][XIB_QPS_RTS].opt_param[IB_QPT_UD] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_RTR][XIB_QPS_RTS].opt_param[IB_QPT_UC] = + (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE); + qst[XIB_QPS_RTR][XIB_QPS_RTS].opt_param[IB_QPT_RC] = + (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC); + qst[XIB_QPS_RTR][XIB_QPS_RTS].opt_param[IB_QPT_SMI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_RTR][XIB_QPS_RTS].opt_param[IB_QPT_GSI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + + // + // XIB_QPS_RTS + // + + // XIB_QPS_RESET + qst[XIB_QPS_RTS][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_RTS][XIB_QPS_ERR].valid = 1; + + // XIB_QPS_RTS + qst[XIB_QPS_RTS][XIB_QPS_RTS].valid = 1; + + qst[XIB_QPS_RTS][XIB_QPS_RTS].opt_param[IB_QPT_UD] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_RTS][XIB_QPS_RTS].opt_param[IB_QPT_UC] = + (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE); + qst[XIB_QPS_RTS][XIB_QPS_RTS].opt_param[IB_QPT_RC] = + (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER); + qst[XIB_QPS_RTS][XIB_QPS_RTS].opt_param[IB_QPT_SMI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_RTS][XIB_QPS_RTS].opt_param[IB_QPT_GSI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + + // XIB_QPS_SQD + qst[XIB_QPS_RTS][XIB_QPS_SQD].valid = 1; + qst[XIB_QPS_RTS][XIB_QPS_SQD].opt_param[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY; + qst[XIB_QPS_RTS][XIB_QPS_SQD].opt_param[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY; + qst[XIB_QPS_RTS][XIB_QPS_SQD].opt_param[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY; + qst[XIB_QPS_RTS][XIB_QPS_SQD].opt_param[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY; + qst[XIB_QPS_RTS][XIB_QPS_SQD].opt_param[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY; + + // + // XIB_QPS_SQD + // + + // XIB_QPS_RESET + qst[XIB_QPS_SQD][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_SQD][XIB_QPS_ERR].valid = 1; + + // XIB_QPS_RTS + qst[XIB_QPS_SQD][XIB_QPS_RTS].valid = 1; + + qst[XIB_QPS_SQD][XIB_QPS_RTS].opt_param[IB_QPT_UD] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_SQD][XIB_QPS_RTS].opt_param[IB_QPT_UC] = + (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE); + qst[XIB_QPS_SQD][XIB_QPS_RTS].opt_param[IB_QPT_RC] = + (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE); + qst[XIB_QPS_SQD][XIB_QPS_RTS].opt_param[IB_QPT_SMI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_SQD][XIB_QPS_RTS].opt_param[IB_QPT_GSI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + + // XIB_QPS_SQD + qst[XIB_QPS_SQD][XIB_QPS_SQD].valid = 1; + + qst[XIB_QPS_SQD][XIB_QPS_SQD].opt_param[IB_QPT_UD] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + qst[XIB_QPS_SQD][XIB_QPS_SQD].opt_param[IB_QPT_UC] = + (IB_QP_AV | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE); + qst[XIB_QPS_SQD][XIB_QPS_SQD].opt_param[IB_QPT_RC] = + (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE); + qst[XIB_QPS_SQD][XIB_QPS_SQD].opt_param[IB_QPT_SMI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + qst[XIB_QPS_SQD][XIB_QPS_SQD].opt_param[IB_QPT_GSI] = + (IB_QP_PKEY_INDEX | IB_QP_QKEY); + + // + // XIB_QPS_SQE + // + + // XIB_QPS_RESET + qst[XIB_QPS_SQE][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_SQE][XIB_QPS_ERR].valid = 1; + + // XIB_QPS_RTS + qst[XIB_QPS_SQE][XIB_QPS_RTS].valid = 1; + + qst[XIB_QPS_SQE][XIB_QPS_RTS].opt_param[IB_QPT_UD] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_SQE][XIB_QPS_RTS].opt_param[IB_QPT_UC] = + (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS); + qst[XIB_QPS_SQE][XIB_QPS_RTS].opt_param[IB_QPT_SMI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + qst[XIB_QPS_SQE][XIB_QPS_RTS].opt_param[IB_QPT_GSI] = + (IB_QP_CUR_STATE | IB_QP_QKEY); + + + // + // XIB_QPS_ERR + // + + // XIB_QPS_RESET + qst[XIB_QPS_ERR][XIB_QPS_RESET].valid = 1; + + // XIB_QPS_ERR + qst[XIB_QPS_ERR][XIB_QPS_ERR].valid = 1; + +} + +int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (cur_state < 0 || cur_state > XIB_QPS_ERR || + next_state < 0 || next_state > XIB_QPS_ERR) + return 0; + + if (mask & IB_QP_CUR_STATE && + cur_state != XIB_QPS_RTR && cur_state != XIB_QPS_RTS && + cur_state != XIB_QPS_SQD && cur_state != XIB_QPS_SQE) + return 0; + + if (!qst[cur_state][next_state].valid) + return 0; + + req_param = qst[cur_state][next_state].req_param[type]; + opt_param = qst[cur_state][next_state].opt_param[type]; + + if ((mask & req_param) != req_param) + return 0; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return 0; + + return 1; +} +EXPORT_SYMBOL(ib_modify_qp_is_ok); + +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct ib_ah *ah; + + ah = pd->device->create_ah(pd, ah_attr); + + if (!IS_ERR(ah)) { + ah->device = pd->device; + ah->pd = pd; + ah->p_uctx = NULL; + atomic_inc(&pd->usecnt); + } + + return ah; +} +EXPORT_SYMBOL(ib_create_ah); + +int ib_destroy_ah(struct ib_ah *ah) +{ + int ret; + struct ib_pd *pd = ah->pd; + + ret = ah->device->destroy_ah(ah); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_ah); + diff --git a/branches/ConnectX/hw/mlx4/kernel/dirs b/branches/ConnectX/hw/mlx4/kernel/dirs new file mode 100644 index 00000000..8c2beb30 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/dirs @@ -0,0 +1,6 @@ +DIRS=\ + core \ + net \ + ib \ + hca \ + bus diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/Makefile b/branches/ConnectX/hw/mlx4/kernel/hca/Makefile new file mode 100644 index 00000000..ddc9da43 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/Makefile @@ -0,0 +1,6 @@ +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the Windows NT DDK +# + +!INCLUDE ..\..\..\..\inc\openib.def diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/SOURCES b/branches/ConnectX/hw/mlx4/kernel/hca/SOURCES new file mode 100644 index 00000000..b9eae97b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/SOURCES @@ -0,0 +1,56 @@ +TARGETNAME=mlx4_hca +TARGETPATH=..\..\..\..\bin\kernel\obj$(BUILD_ALT_DIR) +TARGETTYPE=DRIVER + +!if $(FREEBUILD) +#ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + +SOURCES= \ + hca.rc \ + av.c \ + ca.c \ + cq.c \ + data.c \ + direct.c \ + drv.c \ + fw.c \ + mcast.c \ + mr.c \ + pd.c \ + qp.c \ + srq.c \ + verbs.c \ + vp.c \ + wmi.c \ + +INCLUDES=..;..\inc;..\..\inc;..\..\..\..\inc;..\..\..\..\inc\kernel; + +PRECOMPILED_INCLUDE=precomp.h + +NTTARGETFILE0=mofcomp + +KMDF_VERSION=1 + +C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN -DUSE_WDM_FRAMEWORK + +TARGETLIBS= \ + $(TARGETPATH)\*\complib.lib \ + $(DDK_LIB_PATH)\ntstrsafe.lib \ + +#LINKER_FLAGS=/MAP + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING +RUN_WPP= $(SOURCES) -km -ext: .c .h .C .H \ + -scan:debug.h \ + -func:HCA_PRINT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +# -func:HCA_PRINT_EV(LEVEL,FLAGS,(MSG,...)) \ + +MSC_OPTIMIZATION=/Oi +MSC_WARNING_LEVEL= /W4 diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/av.c b/branches/ConnectX/hw/mlx4/kernel/hca/av.c new file mode 100644 index 00000000..ccd8393f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/av.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "av.tmh" +#endif + +/* +* Address Vector Management Verbs +*/ +ib_api_status_t +mlnx_create_av ( + IN const ib_pd_handle_t h_pd, + IN const ib_av_attr_t *p_ib_av_attr, + OUT ib_av_handle_t *ph_av, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err = 0; + ib_api_status_t status = IB_SUCCESS; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + struct ib_ah *p_ib_ah; + struct ib_ah_attr ah_attr; + struct ib_ucontext *p_uctx = NULL; + + HCA_ENTER(HCA_DBG_AV); + + if (p_umv_buf && p_umv_buf->command) { + status = IB_UNSUPPORTED; + goto err_nosys; + } + + // fill parameters + err = to_av( p_ib_pd->device, p_ib_av_attr, &ah_attr ); + if (err) { + status = errno_to_iberr(err); + goto err_to_av; + } + + // create AV + p_ib_ah = p_ib_pd->device->create_ah(p_ib_pd, &ah_attr); + if (IS_ERR(p_ib_ah)) { + err = PTR_ERR(p_ib_ah); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n", err)); + goto err_create_ah; + } + + // fill results + p_ib_ah->device = p_ib_pd->device; + p_ib_ah->pd = p_ib_pd; + p_ib_ah->p_uctx = p_uctx; + atomic_inc(&p_ib_pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_AV ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)p_ib_pd)->pdn, p_ib_pd->usecnt, p_ib_pd, p_ib_pd->p_uctx)); + + // return the result + if (ph_av) *ph_av = (ib_av_handle_t)p_ib_ah; + + status = IB_SUCCESS; + +err_create_ah: +err_to_av: +err_nosys: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_AV); + return status; +} + +ib_api_status_t +mlnx_query_av ( + IN const ib_av_handle_t h_av, + OUT ib_av_attr_t *p_ib_av_attr, + OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *p_ib_ah = (struct ib_ah *)h_av; + struct ib_ah_attr ah_attr; + UNUSED_PARAM(p_umv_buf); + + HCA_ENTER(HCA_DBG_AV); + + if (p_umv_buf && p_umv_buf->command) { + status = IB_UNSUPPORTED; + goto err_nosys; + } + + // query AV + err = p_ib_ah->device->query_ah(p_ib_ah, &ah_attr); + if (err) { + status = errno_to_iberr(err); + goto err_query_ah; + } + + // results + if (ph_pd) + *ph_pd = (ib_pd_handle_t)p_ib_ah->pd; + err = from_av( p_ib_ah->device, NULL, &ah_attr, p_ib_av_attr ); + status = errno_to_iberr(err); + +err_query_ah: +err_nosys: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_AV); + return status; +} + +ib_api_status_t +mlnx_modify_av ( + IN const ib_av_handle_t h_av, + IN const ib_av_attr_t *p_ib_av_attr, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + struct ib_ah_attr ah_attr; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *p_ib_ah = (struct ib_ah *)h_av; + UNUSED_PARAM(p_umv_buf); + + HCA_ENTER(HCA_DBG_AV); + + if (p_umv_buf && p_umv_buf->command) { + status = IB_UNSUPPORTED; + goto err_nosys; + } + + // fill parameters + err = to_av( p_ib_ah->device, p_ib_av_attr, &ah_attr ); + if (err) { + status = errno_to_iberr(err); + goto err_to_av; + } + + // modify AV + if (p_ib_ah->device->modify_ah) + err = p_ib_ah->device->modify_ah(p_ib_ah, &ah_attr); + else + err = -ENOSYS; + status = errno_to_iberr(err); + +err_to_av: +err_nosys: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_AV); + return status; +} + +ib_api_status_t +mlnx_destroy_av ( + IN const ib_av_handle_t h_av) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *p_ib_ah = (struct ib_ah *)h_av; + struct ib_pd *pd = p_ib_ah->pd; + + HCA_ENTER(HCA_DBG_AV); + + // destroy AV + err = p_ib_ah->device->destroy_ah(p_ib_ah); + if (!err) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("pdn %d, usecnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx)); + } + status = errno_to_iberr(err); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_AV); + return status; +} + + + + +void +mlnx_av_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->create_av = mlnx_create_av; + p_interface->query_av = mlnx_query_av; + p_interface->modify_av = mlnx_modify_av; + p_interface->destroy_av = mlnx_destroy_av; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/ca.c b/branches/ConnectX/hw/mlx4/kernel/hca/ca.c new file mode 100644 index 00000000..2a47fae8 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/ca.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "ca.tmh" +#endif + +ib_api_status_t +mlnx_open_ca ( + IN const ib_net64_t ca_guid, // IN const char * ca_name, + IN const ci_completion_cb_t pfn_completion_cb, + IN const ci_async_event_cb_t pfn_async_event_cb, + IN const void*const ca_context, + OUT ib_ca_handle_t *ph_ca) +{ + mlnx_hca_t *p_hca; + ib_api_status_t status = IB_NOT_FOUND; + struct ib_device *p_ibdev; + + HCA_ENTER(HCA_DBG_SHIM); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM, + ("context 0x%p\n", ca_context)); + + // find CA object + p_hca = mlnx_hca_from_guid( ca_guid ); + if( !p_hca ) { + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("completes with ERROR status IB_NOT_FOUND\n")); + } + HCA_EXIT(HCA_DBG_SHIM); + return IB_NOT_FOUND; + } + + p_ibdev = hca2ibdev(p_hca); + + if (hca_is_livefish(hca2fdo(p_hca))) + goto done; + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM, + ("context 0x%p\n", ca_context)); + status = mlnx_set_cb(p_hca, + pfn_completion_cb, + pfn_async_event_cb, + ca_context); + if (IB_SUCCESS != status) { + goto err_set_cb; + } + + + //TODO: do we need something for kernel users ? + + // Return pointer to HCA object +done: + if (ph_ca) *ph_ca = (ib_ca_handle_t)p_hca; + status = IB_SUCCESS; + +//err_mad_cache: +err_set_cb: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +ib_api_status_t +mlnx_query_ca ( + IN const ib_ca_handle_t h_ca, + OUT ib_ca_attr_t *p_ca_attr, + IN OUT uint32_t *p_byte_count, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int i; + int err; + ib_api_status_t status; + uint32_t size, required_size; + int port_num, num_ports; + uint32_t num_gids, num_pkeys; + uint32_t num_page_sizes = 1; // TBD: what is actually supported + uint8_t *last_p; + struct ib_device_attr props; + struct ib_port_attr *hca_ports = NULL; + mlnx_hca_t *p_hca = (mlnx_hca_t *)h_ca; + struct ib_device *p_ibdev = hca2ibdev(p_hca); + + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM ,("User mode is not supported yet\n")); + p_umv_buf->status = status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + if (NULL == p_byte_count) { + status = IB_INVALID_PARAMETER; + goto err_byte_count; + } + + // query the device + if ( hca_is_livefish(hca2fdo(p_hca)) ) { + struct pci_dev *pdev = hca2pdev(p_hca); + props.max_pd = 1; + props.vendor_id = pdev->ven_id; + props.vendor_part_id = pdev->dev_id; + err = 0; + } + else + err = p_ibdev->query_device(p_ibdev, &props); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("ib_query_device failed (%d)\n",err)); + status = errno_to_iberr(err); + goto err_query_device; + } + + // alocate arrary for port properties + num_ports = p_ibdev->phys_port_cnt; /* Number of physical ports of the HCA */ + if ( num_ports ) + if (NULL == (hca_ports = cl_zalloc( num_ports * sizeof *hca_ports))) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed to cl_zalloc ports array\n")); + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_ports; + } + + // start calculation of ib_ca_attr_t full size + num_gids = 0; + num_pkeys = 0; + required_size = PTR_ALIGN(sizeof(ib_ca_attr_t)) + + PTR_ALIGN(sizeof(uint32_t) * num_page_sizes) + + PTR_ALIGN(sizeof(ib_port_attr_t) * num_ports)+ + PTR_ALIGN(MLX4_BOARD_ID_LEN)+ + PTR_ALIGN(sizeof(uplink_info_t)); /* uplink info */ + + // get port properties + for (port_num = 0; port_num <= (end_port(p_ibdev) - start_port(p_ibdev)); ++port_num) { + // request + err = p_ibdev->query_port(p_ibdev, (u8)(port_num + start_port(p_ibdev)), &hca_ports[port_num]); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("ib_query_port failed(%d) for port %d\n",err, port_num)); + status = errno_to_iberr(err); + goto err_query_port; + } + + // calculate GID table size + num_gids = hca_ports[port_num].gid_tbl_len; + size = PTR_ALIGN(sizeof(ib_gid_t) * num_gids); + required_size += size; + + // calculate pkeys table size + num_pkeys = hca_ports[port_num].pkey_tbl_len; + size = PTR_ALIGN(sizeof(uint16_t) * num_pkeys); + required_size += size; + } + + // resource sufficience check + if (NULL == p_ca_attr || *p_byte_count < required_size) { + *p_byte_count = required_size; + status = IB_INSUFFICIENT_MEMORY; + if ( p_ca_attr != NULL) { + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("Failed *p_byte_count (%d) < required_size (%d)\n", *p_byte_count, required_size )); + } + goto err_insuff_mem; + } + + // Space is sufficient - setup table pointers + last_p = (uint8_t*)p_ca_attr; + last_p += PTR_ALIGN(sizeof(*p_ca_attr)); + + p_ca_attr->p_page_size = (uint32_t*)last_p; + last_p += PTR_ALIGN(num_page_sizes * sizeof(uint32_t)); + + p_ca_attr->p_port_attr = (ib_port_attr_t *)last_p; + last_p += PTR_ALIGN(num_ports * sizeof(ib_port_attr_t)); + + for (port_num = 0; port_num < num_ports; port_num++) { + p_ca_attr->p_port_attr[port_num].p_gid_table = (ib_gid_t *)last_p; + size = PTR_ALIGN(sizeof(ib_gid_t) * hca_ports[port_num].gid_tbl_len); + last_p += size; + + p_ca_attr->p_port_attr[port_num].p_pkey_table = (uint16_t *)last_p; + size = PTR_ALIGN(sizeof(uint16_t) * hca_ports[port_num].pkey_tbl_len); + last_p += size; + } + + //copy vendor specific data + cl_memcpy(last_p,hca2mdev(p_hca)->board_id, MLX4_BOARD_ID_LEN); + last_p += PTR_ALIGN(MLX4_BOARD_ID_LEN); + *(uplink_info_t*)last_p = hca2pdev(p_hca)->uplink_info; + last_p += PTR_ALIGN(sizeof(uplink_info_t)); /* uplink info */ + + // Separate the loops to ensure that table pointers are always setup + for (port_num = 0; port_num < num_ports; port_num++) { + + // get pkeys, using cache + for (i=0; i < hca_ports[port_num].pkey_tbl_len; ++i) { + err = p_ibdev->x.get_cached_pkey( p_ibdev, (u8)(port_num + start_port(p_ibdev)), i, + &p_ca_attr->p_port_attr[port_num].p_pkey_table[i] ); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("ib_get_cached_pkey failed (%d) for port_num %d, index %d\n", + err, port_num + start_port(p_ibdev), i)); + goto err_get_pkey; + } + } + + // get gids, using cache + for (i=0; i < hca_ports[port_num].gid_tbl_len; ++i) { + union ib_gid * __ptr64 gid = (union ib_gid *)&p_ca_attr->p_port_attr[port_num].p_gid_table[i]; + err = p_ibdev->x.get_cached_gid( p_ibdev, (u8)(port_num + start_port(p_ibdev)), i, (union ib_gid *)gid ); + //TODO: do we need to convert gids to little endian + if (err) { + status = errno_to_iberr(err); + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("ib_get_cached_gid failed (%d) for port_num %d, index %d\n", + err, port_num + start_port(p_ibdev), i)); + goto err_get_gid; + } + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM,("port %d gid0:\n", port_num)); + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM, + (" 0x%x%x%x%x%x%x%x%x-0x%x%x%x%x%x%x%x%x\n", + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[0], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[1], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[2], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[3], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[4], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[5], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[6], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[7], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[8], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[9], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[10], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[11], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[12], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[13], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[14], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[15])); + } + + // set result size + p_ca_attr->size = required_size; + CL_ASSERT( required_size == (((uintn_t)last_p) - ((uintn_t)p_ca_attr)) ); + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM , ("Space required %d used %d\n", + required_size, (int)((uintn_t)last_p - (uintn_t)p_ca_attr) )); + + // !!! GID/PKEY tables must be queried before this call !!! + from_hca_cap(p_ibdev, &props, hca_ports, p_ca_attr); + + status = IB_SUCCESS; + +err_get_gid: +err_get_pkey: +err_insuff_mem: +err_query_port: + if (hca_ports) + cl_free(hca_ports); +err_alloc_ports: +err_query_device: +err_byte_count: +err_unsupported: +err_user_unsupported: + if( status != IB_INSUFFICIENT_MEMORY && status != IB_SUCCESS ) + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +ib_api_status_t +mlnx_modify_ca ( + IN const ib_ca_handle_t h_ca, + IN const uint8_t port_num, + IN const ib_ca_mod_t modca_cmd, + IN const ib_port_attr_mod_t *p_port_attr) +{ +#define SET_CAP_MOD(al_mask, al_fld, ib) \ + if (modca_cmd & al_mask) { \ + if (p_port_attr->cap.##al_fld) \ + props.set_port_cap_mask |= ib; \ + else \ + props.clr_port_cap_mask |= ib; \ + } + + ib_api_status_t status; + int err; + struct ib_port_modify props; + int port_modify_mask = 0; + mlnx_hca_t *p_hca = (mlnx_hca_t *)h_ca; + struct ib_device *p_ibdev = hca2ibdev(p_hca); + + HCA_ENTER(HCA_DBG_SHIM); + + //sanity check + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + if (port_num < start_port(p_ibdev) || port_num > end_port(p_ibdev)) { + status = IB_INVALID_PORT; + goto err_port; + } + + // prepare parameters + RtlZeroMemory(&props, sizeof(props)); + SET_CAP_MOD(IB_CA_MOD_IS_SM, sm, IB_PORT_SM); + SET_CAP_MOD(IB_CA_MOD_IS_SNMP_SUPPORTED, snmp, IB_PORT_SNMP_TUNNEL_SUP); + SET_CAP_MOD(IB_CA_MOD_IS_DEV_MGMT_SUPPORTED, dev_mgmt, IB_PORT_DEVICE_MGMT_SUP); + SET_CAP_MOD(IB_CA_MOD_IS_VEND_SUPPORTED, vend, IB_PORT_VENDOR_CLASS_SUP); + if ((modca_cmd & IB_CA_MOD_QKEY_CTR) && (p_port_attr->qkey_ctr == 0)) + port_modify_mask |= IB_PORT_RESET_QKEY_CNTR; + + // modify port + err = p_ibdev->modify_port(p_ibdev, port_num, port_modify_mask, &props ); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("ib_modify_port failed (%d) \n",err)); + goto err_modify_port; + } + + status = IB_SUCCESS; + +err_modify_port: +err_port: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +ib_api_status_t +mlnx_close_ca ( + IN ib_ca_handle_t h_ca) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)h_ca; + HCA_ENTER(HCA_DBG_SHIM); + + + if (hca_is_livefish(hca2fdo(p_hca))) + goto done; + + mlnx_reset_cb(p_hca); + +done: + HCA_EXIT(HCA_DBG_SHIM); + + return IB_SUCCESS; +} + + + +void +mlnx_ca_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->open_ca = mlnx_open_ca; + p_interface->modify_ca = mlnx_modify_ca; + p_interface->query_ca = mlnx_query_ca; + p_interface->close_ca = mlnx_close_ca; +} + +void +mlnx_ca_if_livefish( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->open_ca = mlnx_open_ca; + p_interface->query_ca = mlnx_query_ca; + p_interface->close_ca = mlnx_close_ca; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/cq.c b/branches/ConnectX/hw/mlx4/kernel/hca/cq.c new file mode 100644 index 00000000..de5cde78 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/cq.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "cq.tmh" +#endif + +ib_api_status_t +mlnx_create_cq ( + IN const ib_ca_handle_t h_ca, + IN const void *cq_context, + IN OUT uint32_t *p_size, + OUT ib_cq_handle_t *ph_cq, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_cq *p_ib_cq; + mlnx_hca_t *p_hca; + struct ib_device *p_ibdev; + struct ib_ucontext *p_uctx; + + HCA_ENTER(HCA_DBG_CQ); + + if( p_umv_buf && p_umv_buf->command) { + + p_uctx = (struct ib_ucontext *)h_ca; + p_ibdev = p_uctx->device; + p_hca = ibdev2hca(p_ibdev); + + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_cq) || + p_umv_buf->output_size < sizeof(struct ibv_create_cq_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + } + else { + p_uctx = NULL; + p_hca = (mlnx_hca_t *)h_ca; + p_ibdev = hca2ibdev(p_hca); + } + + /* sanity check */ + if (!*p_size || *p_size > (uint32_t)hca2mdev(p_hca)->caps.max_cqes) { + status = IB_INVALID_CQ_SIZE; + goto err_cqe; + } + + // allocate cq + p_ib_cq = ibv_create_cq(p_ibdev, + cq_comp_handler, cq_event_handler, + p_hca, *p_size, p_uctx, p_umv_buf ); + if (IS_ERR(p_ib_cq)) { + err = PTR_ERR(p_ib_cq); + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_CQ, ("ibv_create_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_cq; + } + + // fill the object + p_ib_cq->x.ctx = (void*)cq_context; + + // return the result + *p_size = p_ib_cq->cqe; + + if (ph_cq) *ph_cq = (ib_cq_handle_t)p_ib_cq; + + status = IB_SUCCESS; + +err_create_cq: +err_inval_params: +err_cqe: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_resize_cq ( + IN const ib_cq_handle_t h_cq, + IN OUT uint32_t *p_size, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *p_ib_cq = (struct ib_cq *)h_cq; + struct ib_device *p_ibdev = p_ib_cq->device; + + UNUSED_PARAM(p_umv_buf); + + HCA_ENTER(HCA_DBG_CQ); + + if (p_ibdev->resize_cq) { + err = p_ibdev->resize_cq(p_ib_cq, *p_size, NULL); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("ib_resize_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + } + } + else + status = IB_UNSUPPORTED; + + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_query_cq ( + IN const ib_cq_handle_t h_cq, + OUT uint32_t *p_size, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_cq); + UNREFERENCED_PARAMETER(p_size); + if (p_umv_buf && p_umv_buf->command) { + p_umv_buf->status = IB_UNSUPPORTED; + } + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ,("mlnx_query_cq not supported\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_destroy_cq ( + IN const ib_cq_handle_t h_cq) +{ + + ib_api_status_t status; + int err; + struct ib_cq *p_ib_cq = (struct ib_cq *)h_cq; + + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_CQ, + ("cqn %#x, pcs %p\n", ((struct mlx4_ib_cq*)p_ib_cq)->mcq.cqn, PsGetCurrentProcess()) ); + + // destroy CQ + err = ib_destroy_cq( p_ib_cq ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_SHIM, + ("ibv_destroy_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_cq; + } + + status = IB_SUCCESS; + +err_destroy_cq: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_CQ); + return status; +} + + + + +void +mlnx_cq_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->create_cq = mlnx_create_cq; + p_interface->resize_cq = mlnx_resize_cq; + p_interface->query_cq = mlnx_query_cq; + p_interface->destroy_cq = mlnx_destroy_cq; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/data.c b/branches/ConnectX/hw/mlx4/kernel/hca/data.c new file mode 100644 index 00000000..0447626c --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/data.c @@ -0,0 +1,1039 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: data.c 1944 2007-02-12 16:16:00Z sleybo $ + */ + + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "data.tmh" +#endif + +static cl_spinlock_t hca_lock; + + + +uint32_t g_mlnx_dpc2thread = 0; + + +cl_qlist_t mlnx_hca_list; + +///////////////////////////////////////////////////////// +// ### HCA +///////////////////////////////////////////////////////// +void +mlnx_hca_insert( + IN mlnx_hca_t *p_hca ) +{ + cl_spinlock_acquire( &hca_lock ); + cl_qlist_insert_tail( &mlnx_hca_list, &p_hca->list_item ); + cl_spinlock_release( &hca_lock ); +} + +void +mlnx_hca_remove( + IN mlnx_hca_t *p_hca ) +{ + cl_spinlock_acquire( &hca_lock ); + cl_qlist_remove_item( &mlnx_hca_list, &p_hca->list_item ); + cl_spinlock_release( &hca_lock ); +} + +mlnx_hca_t* +mlnx_hca_from_guid( + IN ib_net64_t guid ) +{ + cl_list_item_t *p_item; + mlnx_hca_t *p_hca = NULL; + + cl_spinlock_acquire( &hca_lock ); + p_item = cl_qlist_head( &mlnx_hca_list ); + while( p_item != cl_qlist_end( &mlnx_hca_list ) ) + { + p_hca = PARENT_STRUCT( p_item, mlnx_hca_t, list_item ); + if( p_hca->guid == guid ) + break; + p_item = cl_qlist_next( p_item ); + p_hca = NULL; + } + cl_spinlock_release( &hca_lock ); + return p_hca; +} + +///////////////////////////////////////////////////////// +// ### HCA +///////////////////////////////////////////////////////// +cl_status_t +mlnx_hcas_init( void ) +{ + cl_qlist_init( &mlnx_hca_list ); + return cl_spinlock_init( &hca_lock ); +} + + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +ib_api_status_t +mlnx_set_cb( + IN mlnx_hca_t * p_hca, + IN ci_completion_cb_t comp_cb_p, + IN ci_async_event_cb_t async_cb_p, + IN const void* const ib_context) +{ + cl_status_t cl_status; + + // Setup the callbacks + if (!p_hca->async_proc_mgr_p) + { + p_hca->async_proc_mgr_p = cl_malloc( sizeof( cl_async_proc_t ) ); + if( !p_hca->async_proc_mgr_p ) + { + return IB_INSUFFICIENT_MEMORY; + } + cl_async_proc_construct( p_hca->async_proc_mgr_p ); + cl_status = cl_async_proc_init( p_hca->async_proc_mgr_p, MLNX_NUM_CB_THR, "CBthread" ); + if( cl_status != CL_SUCCESS ) + { + cl_async_proc_destroy( p_hca->async_proc_mgr_p ); + cl_free(p_hca->async_proc_mgr_p); + p_hca->async_proc_mgr_p = NULL; + return IB_INSUFFICIENT_RESOURCES; + } + } + + p_hca->comp_cb_p = comp_cb_p; + p_hca->async_cb_p = async_cb_p; + p_hca->ca_context = ib_context; // This is the context our CB forwards to IBAL + return IB_SUCCESS; +} + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +void +mlnx_reset_cb( + IN mlnx_hca_t * p_hca) +{ + cl_async_proc_t *p_async_proc; + + + cl_spinlock_acquire( &hca_lock ); + + p_async_proc = p_hca->async_proc_mgr_p; + p_hca->async_proc_mgr_p = NULL; + + p_hca->comp_cb_p = NULL; + p_hca->async_cb_p = NULL; + p_hca->ca_context = NULL; + p_hca->cl_device_h = NULL; + + cl_spinlock_release( &hca_lock ); + + if( p_async_proc ) + { + cl_async_proc_destroy( p_async_proc ); + cl_free( p_async_proc ); + } + +} + +///////////////////////////////////////////////////////// +void +from_port_cap( + IN u32 mthca_port_cap, + OUT ib_port_cap_t *ibal_port_cap_p) +{ +#define SET_CAP(flag,cap) if (mthca_port_cap & flag) ibal_port_cap_p->cap = TRUE + + SET_CAP(IB_PORT_CM_SUP,cm); + SET_CAP(IB_PORT_SNMP_TUNNEL_SUP,snmp); + SET_CAP(IB_PORT_DEVICE_MGMT_SUP,dev_mgmt); + SET_CAP(IB_PORT_VENDOR_CLASS_SUP,vend); + SET_CAP(IB_PORT_SM_DISABLED,sm_disable); + SET_CAP(IB_PORT_SM,sm); + SET_CAP(IB_PORT_NOTICE_SUP,notice); + SET_CAP(IB_PORT_TRAP_SUP,trap); + SET_CAP(IB_PORT_AUTO_MIGR_SUP,apm); + SET_CAP(IB_PORT_SL_MAP_SUP,slmap); + SET_CAP(IB_PORT_LED_INFO_SUP,ledinfo); + SET_CAP(IB_PORT_CAP_MASK_NOTICE_SUP,capm_notice); + SET_CAP(IB_PORT_CLIENT_REG_SUP,client_reregister); + SET_CAP(IB_PORT_SYS_IMAGE_GUID_SUP,sysguid); + SET_CAP(IB_PORT_BOOT_MGMT_SUP,boot_mgmt); + SET_CAP(IB_PORT_DR_NOTICE_SUP,dr_notice); + SET_CAP(IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP,pkey_switch_ext_port); + SET_CAP(IB_PORT_LINK_LATENCY_SUP,link_rtl); + SET_CAP(IB_PORT_REINIT_SUP,reinit); + SET_CAP(IB_PORT_OPT_IPD_SUP,ipd); + SET_CAP(IB_PORT_MKEY_NVRAM,mkey_nvram); + SET_CAP(IB_PORT_PKEY_NVRAM,pkey_nvram); + // there no MTHCA flags for qkey_ctr, pkey_ctr, port_active, bm IBAL capabilities; +} + + +///////////////////////////////////////////////////////// +void +from_hca_cap( + IN struct ib_device *ib_dev, + IN struct ib_device_attr *hca_info_p, + IN struct ib_port_attr *hca_ports, + OUT ib_ca_attr_t *ca_attr_p) +{ + uint8_t port_num; + ib_port_attr_t *ibal_port_p; + struct ib_port_attr *mthca_port_p; + + ca_attr_p->vend_id = hca_info_p->vendor_id; + ca_attr_p->dev_id = (uint16_t)hca_info_p->vendor_part_id; + ca_attr_p->revision = (uint16_t)hca_info_p->hw_ver; + ca_attr_p->fw_ver = hca_info_p->fw_ver; + ca_attr_p->ca_guid = *(UNALIGNED64 uint64_t *)&ib_dev->node_guid; + ca_attr_p->num_ports = ib_dev->phys_port_cnt; + ca_attr_p->max_qps = hca_info_p->max_qp; + ca_attr_p->max_wrs = hca_info_p->max_qp_wr; + ca_attr_p->max_sges = hca_info_p->max_sge; + ca_attr_p->max_rd_sges = hca_info_p->max_sge_rd; + ca_attr_p->max_cqs = hca_info_p->max_cq; + ca_attr_p->max_cqes = hca_info_p->max_cqe; + ca_attr_p->max_pds = hca_info_p->max_pd; + ca_attr_p->init_regions = hca_info_p->max_mr; + ca_attr_p->init_windows = hca_info_p->max_mw; + ca_attr_p->init_region_size = hca_info_p->max_mr_size; + ca_attr_p->max_addr_handles = hca_info_p->max_ah; + ca_attr_p->atomicity = hca_info_p->atomic_cap; + ca_attr_p->max_partitions = hca_info_p->max_pkeys; + ca_attr_p->max_qp_resp_res =(uint8_t) hca_info_p->max_qp_rd_atom; + ca_attr_p->max_resp_res = (uint8_t)hca_info_p->max_res_rd_atom; + ca_attr_p->max_qp_init_depth = (uint8_t)hca_info_p->max_qp_init_rd_atom; + ca_attr_p->max_ipv6_qps = hca_info_p->max_raw_ipv6_qp; + ca_attr_p->max_ether_qps = hca_info_p->max_raw_ethy_qp; + ca_attr_p->max_mcast_grps = hca_info_p->max_mcast_grp; + ca_attr_p->max_mcast_qps = hca_info_p->max_total_mcast_qp_attach; + ca_attr_p->max_qps_per_mcast_grp = hca_info_p->max_mcast_qp_attach; + ca_attr_p->max_fmr = hca_info_p->max_fmr; + ca_attr_p->max_map_per_fmr = hca_info_p->max_map_per_fmr; + ca_attr_p->max_srq = hca_info_p->max_srq; + ca_attr_p->max_srq_wrs = hca_info_p->max_srq_wr; + ca_attr_p->max_srq_sges = hca_info_p->max_srq_sge; + + ca_attr_p->local_ack_delay = hca_info_p->local_ca_ack_delay; + ca_attr_p->bad_pkey_ctr_support = hca_info_p->device_cap_flags & IB_DEVICE_BAD_PKEY_CNTR; + ca_attr_p->bad_qkey_ctr_support = hca_info_p->device_cap_flags & IB_DEVICE_BAD_QKEY_CNTR; + ca_attr_p->raw_mcast_support = hca_info_p->device_cap_flags & IB_DEVICE_RAW_MULTI; + ca_attr_p->apm_support = hca_info_p->device_cap_flags & IB_DEVICE_AUTO_PATH_MIG; + ca_attr_p->av_port_check = hca_info_p->device_cap_flags & IB_DEVICE_UD_AV_PORT_ENFORCE; + ca_attr_p->change_primary_port = hca_info_p->device_cap_flags & IB_DEVICE_CHANGE_PHY_PORT; + ca_attr_p->modify_wr_depth = hca_info_p->device_cap_flags & IB_DEVICE_RESIZE_MAX_WR; + ca_attr_p->modify_srq_depth = hca_info_p->device_cap_flags & IB_DEVICE_SRQ_RESIZE; + ca_attr_p->hw_agents = FALSE; // in the context of IBAL then agent is implemented on the host + + ca_attr_p->num_page_sizes = 1; + ca_attr_p->p_page_size[0] = PAGE_SIZE; // TBD: extract an array of page sizes from HCA cap + + for (port_num = 0; port_num <= (end_port(ib_dev) - start_port(ib_dev)); ++port_num) + { + // Setup port pointers + ibal_port_p = &ca_attr_p->p_port_attr[port_num]; + mthca_port_p = &hca_ports[port_num]; + + // Port Cabapilities + cl_memclr(&ibal_port_p->cap, sizeof(ib_port_cap_t)); + from_port_cap(mthca_port_p->port_cap_flags, &ibal_port_p->cap); + + // Port Atributes + ibal_port_p->port_num = (u8)(port_num + start_port(ib_dev)); + ibal_port_p->port_guid = ibal_port_p->p_gid_table[0].unicast.interface_id; + ibal_port_p->lid = cl_ntoh16(mthca_port_p->lid); + ibal_port_p->lmc = mthca_port_p->lmc; + ibal_port_p->max_vls = mthca_port_p->max_vl_num; + ibal_port_p->sm_lid = cl_ntoh16(mthca_port_p->sm_lid); + ibal_port_p->sm_sl = mthca_port_p->sm_sl; + ibal_port_p->link_state = (mthca_port_p->state != 0) ? (uint8_t)mthca_port_p->state : IB_LINK_DOWN; + ibal_port_p->num_gids = (uint16_t)mthca_port_p->gid_tbl_len; + ibal_port_p->num_pkeys = mthca_port_p->pkey_tbl_len; + ibal_port_p->pkey_ctr = (uint16_t)mthca_port_p->bad_pkey_cntr; + ibal_port_p->qkey_ctr = (uint16_t)mthca_port_p->qkey_viol_cntr; + ibal_port_p->max_msg_size = mthca_port_p->max_msg_sz; + ibal_port_p->mtu = (uint8_t)mthca_port_p->max_mtu; + + ibal_port_p->subnet_timeout = mthca_port_p->subnet_timeout; + // ibal_port_p->local_ack_timeout = 3; // TBD: currently ~32 usec + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM ,("Port %d port_guid 0x%I64x\n", + ibal_port_p->port_num, cl_ntoh64(ibal_port_p->port_guid))); + } +} + +void cq_comp_handler(struct ib_cq *cq, void *context) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)context; + struct ib_cq *p_ib_cq =(struct ib_cq *)cq; + HCA_ENTER(HCA_DBG_CQ); + if (p_hca && p_hca->comp_cb_p) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("Invoking completion callback\n")); + (p_hca->comp_cb_p)(p_ib_cq->x.ctx); + } + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("Incorrect context. Completion callback was not invoked\n")); + } + HCA_EXIT(HCA_DBG_CQ); +} + +void ca_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)context; + ib_event_rec_t event_rec; + + // prepare parameters + event_rec.context = (void *)p_hca->ca_context; + event_rec.trap.info.port_num = ev->element.port_num; + event_rec.type = ev->event; + if (event_rec.type > IB_AE_UNKNOWN) { + // CL_ASSERT(0); // This shouldn't happen + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("Unmapped E_EV_CA event of type 0x%x. Replaced by 0x%x (IB_AE_LOCAL_FATAL)\n", + event_rec.type, IB_AE_LOCAL_FATAL)); + event_rec.type = IB_AE_LOCAL_FATAL; + } + + // call the user callback + if (p_hca && p_hca->async_cb_p) + (p_hca->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + +void srq_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)context; + ib_event_rec_t event_rec; + struct ib_srq *p_srq; + + // prepare parameters + event_rec.type = ev->event; + // TODO: who fills x.vendor_specific + event_rec.vendor_specific = ev->x.vendor_specific; + p_srq = (struct ib_srq *)ev->element.srq; + event_rec.context = p_srq->srq_context; + + // call the user callback + if (p_hca) + (p_hca->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + + +void qp_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)context; + ib_event_rec_t event_rec; + struct ib_qp *p_ib_qp; + + // prepare parameters + event_rec.type = ev->event; + event_rec.vendor_specific = ev->x.vendor_specific; + p_ib_qp = (struct ib_qp *)ev->element.qp; + event_rec.context = p_ib_qp->x.ctx; + + // call the user callback + if (p_hca) + (p_hca->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + +void cq_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)context; + ib_event_rec_t event_rec; + struct ib_cq *p_ib_cq; + + // prepare parameters + event_rec.type = ev->event; + p_ib_cq = (struct ib_cq *)ev->element.cq; + event_rec.context = p_ib_cq->x.ctx; + + // call the user callback + if (p_hca) + (p_hca->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + +enum ib_rate to_rate(uint8_t rate) +{ + if (rate == IB_PATH_RECORD_RATE_2_5_GBS) return IB_RATE_2_5_GBPS; + if (rate == IB_PATH_RECORD_RATE_5_GBS) return IB_RATE_5_GBPS; + if (rate == IB_PATH_RECORD_RATE_10_GBS) return IB_RATE_10_GBPS; + if (rate == IB_PATH_RECORD_RATE_20_GBS) return IB_RATE_20_GBPS; + if (rate == IB_PATH_RECORD_RATE_30_GBS) return IB_RATE_30_GBPS; + if (rate == IB_PATH_RECORD_RATE_40_GBS) return IB_RATE_40_GBPS; + if (rate == IB_PATH_RECORD_RATE_60_GBS) return IB_RATE_60_GBPS; + if (rate == IB_PATH_RECORD_RATE_80_GBS) return IB_RATE_80_GBPS; + if (rate == IB_PATH_RECORD_RATE_120_GBS) return IB_RATE_120_GBPS; + return IB_RATE_PORT_CURRENT; +} + +uint8_t from_rate(enum ib_rate ib_rate) +{ + if (ib_rate == IB_RATE_2_5_GBPS) return IB_PATH_RECORD_RATE_2_5_GBS; + if (ib_rate == IB_RATE_5_GBPS) return IB_PATH_RECORD_RATE_5_GBS; + if (ib_rate == IB_RATE_10_GBPS) return IB_PATH_RECORD_RATE_10_GBS; + if (ib_rate == IB_RATE_20_GBPS) return IB_PATH_RECORD_RATE_20_GBS; + if (ib_rate == IB_RATE_30_GBPS) return IB_PATH_RECORD_RATE_30_GBS; + if (ib_rate == IB_RATE_40_GBPS) return IB_PATH_RECORD_RATE_40_GBS; + if (ib_rate == IB_RATE_60_GBPS) return IB_PATH_RECORD_RATE_60_GBS; + if (ib_rate == IB_RATE_80_GBPS) return IB_PATH_RECORD_RATE_80_GBS; + if (ib_rate == IB_RATE_120_GBPS) return IB_PATH_RECORD_RATE_120_GBS; + return 0; +} + +int +to_av( + IN const struct ib_device *p_ib_dev, + IN const ib_av_attr_t *p_ib_av_attr, + OUT struct ib_ah_attr *p_ib_ah_attr) +{ + int err = 0; + u8 port_num; + u16 gid_index; + + p_ib_ah_attr->port_num = p_ib_av_attr->port_num; + p_ib_ah_attr->sl = p_ib_av_attr->sl; + p_ib_ah_attr->dlid = cl_ntoh16(p_ib_av_attr->dlid); + p_ib_ah_attr->static_rate = to_rate(p_ib_av_attr->static_rate); + p_ib_ah_attr->src_path_bits = p_ib_av_attr->path_bits; // PATH: + + /* For global destination or Multicast address:*/ + if (p_ib_av_attr->grh_valid) + { + p_ib_ah_attr->ah_flags |= IB_AH_GRH; + p_ib_ah_attr->grh.hop_limit = p_ib_av_attr->grh.hop_limit; + ib_grh_get_ver_class_flow( p_ib_av_attr->grh.ver_class_flow, NULL, + &p_ib_ah_attr->grh.traffic_class, &p_ib_ah_attr->grh.flow_label ); + err = p_ib_dev->x.find_cached_gid((struct ib_device *)p_ib_dev, + (union ib_gid *)p_ib_av_attr->grh.src_gid.raw, &port_num, &gid_index); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM , + ("ib_find_cached_gid failed %d (%#x). Using default: sgid_index = 0\n", err, err)); + gid_index = 0; + } + else if (port_num != p_ib_ah_attr->port_num) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM , + ("ib_find_cached_gid returned wrong port_num %u (Expected - %u). Using the expected.\n", + (u32)port_num, (u32)p_ib_ah_attr->port_num)); + } + p_ib_ah_attr->grh.sgid_index = (u8)gid_index; + RtlCopyMemory(p_ib_ah_attr->grh.dgid.raw, + p_ib_av_attr->grh.dest_gid.raw, sizeof(p_ib_ah_attr->grh.dgid)); + } + + return err; +} + +int from_av( + IN const struct ib_device *p_ib_dev, + IN struct ib_qp_attr *p_ib_qp_attr, + IN struct ib_ah_attr *p_ib_ah_attr, + OUT ib_av_attr_t *p_ib_av_attr) +{ + int err = 0; + + p_ib_av_attr->port_num = p_ib_ah_attr->port_num; + p_ib_av_attr->sl = p_ib_ah_attr->sl; + p_ib_av_attr->dlid = cl_hton16(p_ib_ah_attr->dlid); + p_ib_av_attr->static_rate = from_rate(p_ib_ah_attr->static_rate); + p_ib_av_attr->path_bits = p_ib_ah_attr->src_path_bits; + + if (p_ib_qp_attr) { + p_ib_av_attr->conn.path_mtu = p_ib_qp_attr->path_mtu; // MTU + p_ib_av_attr->conn.local_ack_timeout = p_ib_qp_attr->timeout; // MTU + p_ib_av_attr->conn.seq_err_retry_cnt = p_ib_qp_attr->retry_cnt; // MTU + p_ib_av_attr->conn.rnr_retry_cnt = p_ib_qp_attr->rnr_retry; // MTU + } + + if (p_ib_ah_attr->ah_flags & IB_AH_GRH) { + p_ib_av_attr->grh_valid = TRUE; + p_ib_av_attr->grh.hop_limit = p_ib_ah_attr->grh.hop_limit; + p_ib_av_attr->grh.ver_class_flow = ib_grh_set_ver_class_flow( + 0, p_ib_ah_attr->grh.traffic_class, p_ib_ah_attr->grh.flow_label ); + RtlCopyMemory(p_ib_av_attr->grh.dest_gid.raw, + p_ib_ah_attr->grh.dgid.raw, sizeof(p_ib_av_attr->grh.dest_gid)); + err = p_ib_dev->x.get_cached_gid((struct ib_device *)p_ib_dev, + p_ib_ah_attr->port_num, p_ib_ah_attr->grh.sgid_index, + (union ib_gid*)p_ib_av_attr->grh.src_gid.raw ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM , + ("ib_get_cached_gid failed %d (%#x). Using default: sgid_index = 0\n", err, err)); + } + } + else + p_ib_av_attr->grh_valid = FALSE; + + + return err; +} + +enum ib_access_flags +to_qp_acl( + IN ib_access_t ibal_acl) +{ +#define IBAL_ACL(ifl,mfl) if (ibal_acl & ifl) acc |= mfl + enum ib_access_flags acc = 0; + + IBAL_ACL(IB_AC_RDMA_READ,IB_ACCESS_REMOTE_READ); + IBAL_ACL(IB_AC_RDMA_WRITE,IB_ACCESS_REMOTE_WRITE); + IBAL_ACL(IB_AC_ATOMIC,IB_ACCESS_REMOTE_ATOMIC); + IBAL_ACL(IB_AC_LOCAL_WRITE,IB_ACCESS_LOCAL_WRITE); + IBAL_ACL(IB_AC_MW_BIND,IB_ACCESS_MW_BIND); + + return acc; +} + +ib_access_t +from_qp_acl( + IN enum ib_access_flags acc) +{ +#define IB_ACL(ifl,mfl) if (acc & ifl) ibal_acl |= mfl + ib_access_t ibal_acl = 0; + + IB_ACL(IB_ACCESS_REMOTE_READ,IB_AC_RDMA_READ); + IB_ACL(IB_ACCESS_REMOTE_WRITE,IB_AC_RDMA_WRITE); + IB_ACL(IB_ACCESS_REMOTE_ATOMIC,IB_AC_ATOMIC); + IB_ACL(IB_ACCESS_LOCAL_WRITE,IB_AC_LOCAL_WRITE); + IB_ACL(IB_ACCESS_MW_BIND,IB_AC_MW_BIND); + + return ibal_acl; +} + +static enum ib_qp_state to_qp_state(ib_qp_state_t ib_qps) +{ +#define MAP_XIB_QPS(val1,val2) case val1: qps = val2; break + enum ib_qp_state qps; + switch (ib_qps) { + MAP_XIB_QPS( IB_QPS_RESET, XIB_QPS_RESET ); + MAP_XIB_QPS( IB_QPS_INIT, XIB_QPS_INIT ); + MAP_XIB_QPS( IB_QPS_RTR, XIB_QPS_RTR ); + MAP_XIB_QPS( IB_QPS_RTS, XIB_QPS_RTS ); + MAP_XIB_QPS( IB_QPS_SQD, XIB_QPS_SQD ); + MAP_XIB_QPS( IB_QPS_SQD_DRAINING, XIB_QPS_SQD ); + MAP_XIB_QPS( IB_QPS_SQD_DRAINED, XIB_QPS_SQD ); + MAP_XIB_QPS( IB_QPS_SQERR, XIB_QPS_SQE ); + MAP_XIB_QPS( IB_QPS_ERROR, XIB_QPS_ERR ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped IBAL qp_state %d\n", ib_qps)); + qps = 0xffffffff; + } + return qps; +} + +static ib_qp_state_t from_qp_state(enum ib_qp_state qps, int draining) +{ +#define MAP_IB_QPS(val1,val2) case val1: ib_qps = val2; break + ib_qp_state_t ib_qps; + + if (qps == XIB_QPS_SQD) { + ib_qps = draining ? IB_QPS_SQD_DRAINING : IB_QPS_SQD; + return ib_qps; + } + + switch (qps) { + MAP_IB_QPS( XIB_QPS_RESET, IB_QPS_RESET ); + MAP_IB_QPS( XIB_QPS_INIT, IB_QPS_INIT ); + MAP_IB_QPS( XIB_QPS_RTR, IB_QPS_RTR ); + MAP_IB_QPS( XIB_QPS_RTS, IB_QPS_RTS ); + MAP_IB_QPS( XIB_QPS_SQE, IB_QPS_SQERR ); + MAP_IB_QPS( XIB_QPS_ERR, IB_QPS_ERROR ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped IBAL qp_state %d\n", qps)); + ib_qps = 0xffffffff; + } + return ib_qps; +} + +ib_api_status_t +to_qp_attr( + IN const struct ib_qp *p_ib_qp, + IN ib_qp_type_t qp_type, + IN const ib_qp_mod_t *p_ib_qp_mod, + OUT struct ib_qp_attr *p_ib_qp_attr, + OUT int *p_qp_attr_mask + ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mlx4_ib_qp *p_mib_qp = (struct mlx4_ib_qp *)p_ib_qp; + + RtlZeroMemory( p_ib_qp_attr, sizeof *p_ib_qp_attr ); + *p_qp_attr_mask = IB_QP_STATE; + p_ib_qp_attr->qp_state = to_qp_state( p_ib_qp_mod->req_state ); + + // skipped cases + if (p_mib_qp->state == XIB_QPS_RESET && p_ib_qp_mod->req_state != IB_QPS_INIT) + return IB_NOT_DONE; + + switch (p_ib_qp_mod->req_state) { + case IB_QPS_RESET: + case IB_QPS_ERROR: + case IB_QPS_SQERR: + case IB_QPS_TIME_WAIT: + break; + + case IB_QPS_INIT: + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + case IB_QPT_UNRELIABLE_CONN: + *p_qp_attr_mask |= IB_QP_PORT | IB_QP_PKEY_INDEX |IB_QP_ACCESS_FLAGS; + p_ib_qp_attr->qp_access_flags = to_qp_acl(p_ib_qp_mod->state.init.access_ctrl); + break; + case IB_QPT_QP0: + case IB_QPT_QP1: + // TODO: these cases had IB_QP_PORT in mthca + // TODO: they do not pass ib_modify_qp_is_ok control here + *p_qp_attr_mask |= IB_QP_QKEY | IB_QP_PKEY_INDEX ; + p_ib_qp_attr->qkey = cl_ntoh32 (p_ib_qp_mod->state.init.qkey); + break; + case IB_QPT_UNRELIABLE_DGRM: + default: + *p_qp_attr_mask |= IB_QP_PORT | IB_QP_QKEY | IB_QP_PKEY_INDEX ; + p_ib_qp_attr->qkey = cl_ntoh32 (p_ib_qp_mod->state.init.qkey); + break; + } + + // IB_QP_PORT + p_ib_qp_attr->port_num = p_ib_qp_mod->state.init.primary_port; + + // IB_QP_PKEY_INDEX + p_ib_qp_attr->pkey_index = p_ib_qp_mod->state.init.pkey_index; + + break; + + case IB_QPS_RTR: + /* modifying the WQE depth is not supported */ + if( p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_SQ_DEPTH || + p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_RQ_DEPTH ) { + status = IB_UNSUPPORTED; + break; + } + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + *p_qp_attr_mask |= /* required flags */ + IB_QP_DEST_QPN |IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_AV |IB_QP_PATH_MTU | IB_QP_MIN_RNR_TIMER; + + // IB_QP_DEST_QPN + p_ib_qp_attr->dest_qp_num = cl_ntoh32 (p_ib_qp_mod->state.rtr.dest_qp); + + // IB_QP_RQ_PSN + p_ib_qp_attr->rq_psn = cl_ntoh32 (p_ib_qp_mod->state.rtr.rq_psn); + + // IB_QP_MAX_DEST_RD_ATOMIC + p_ib_qp_attr->max_dest_rd_atomic = p_ib_qp_mod->state.rtr.resp_res; + + // IB_QP_AV, IB_QP_PATH_MTU: Convert primary RC AV (mandatory) + err = to_av(p_ib_qp->device, + &p_ib_qp_mod->state.rtr.primary_av, &p_ib_qp_attr->ah_attr); + if (err) { + status = IB_ERROR; + break; + } + p_ib_qp_attr->path_mtu = p_ib_qp_mod->state.rtr.primary_av.conn.path_mtu; // MTU + p_ib_qp_attr->timeout = p_ib_qp_mod->state.rtr.primary_av.conn.local_ack_timeout; // MTU + p_ib_qp_attr->retry_cnt = p_ib_qp_mod->state.rtr.primary_av.conn.seq_err_retry_cnt; // MTU + p_ib_qp_attr->rnr_retry = p_ib_qp_mod->state.rtr.primary_av.conn.rnr_retry_cnt; // MTU + + // IB_QP_MIN_RNR_TIMER, required in RTR, optional in RTS. + p_ib_qp_attr->min_rnr_timer = p_ib_qp_mod->state.rtr.rnr_nak_timeout; + + // IB_QP_ACCESS_FLAGS: Convert Remote Atomic Flags + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_ACCESS_CTRL) { + *p_qp_attr_mask |= IB_QP_ACCESS_FLAGS; /* optional flag */ + p_ib_qp_attr->qp_access_flags = to_qp_acl(p_ib_qp_mod->state.rtr.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_ALTERNATE_AV) { + *p_qp_attr_mask |= IB_QP_ALT_PATH; /* required flag */ + err = to_av(p_ib_qp->device, + &p_ib_qp_mod->state.rtr.alternate_av, &p_ib_qp_attr->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + p_ib_qp_attr->alt_timeout = p_ib_qp_mod->state.rtr.alternate_av.conn.local_ack_timeout; // XXX: conv + } + + // IB_QP_PKEY_INDEX + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_PKEY) { + *p_qp_attr_mask |= IB_QP_PKEY_INDEX; + p_ib_qp_attr->pkey_index = p_ib_qp_mod->state.rtr.pkey_index; + } + break; + + case IB_QPT_UNRELIABLE_CONN: + *p_qp_attr_mask |= /* required flags */ + IB_QP_DEST_QPN |IB_QP_RQ_PSN | IB_QP_AV | IB_QP_PATH_MTU; + + // IB_QP_DEST_QPN + p_ib_qp_attr->dest_qp_num = cl_ntoh32 (p_ib_qp_mod->state.rtr.dest_qp); + + // IB_QP_RQ_PSN + p_ib_qp_attr->rq_psn = cl_ntoh32 (p_ib_qp_mod->state.rtr.rq_psn); + + // IB_QP_PATH_MTU + p_ib_qp_attr->path_mtu = p_ib_qp_mod->state.rtr.primary_av.conn.path_mtu; + + // IB_QP_AV: Convert primary AV (mandatory) + err = to_av(p_ib_qp->device, + &p_ib_qp_mod->state.rtr.primary_av, &p_ib_qp_attr->ah_attr); + if (err) { + status = IB_ERROR; + break; + } + + // IB_QP_ACCESS_FLAGS: Convert Remote Atomic Flags + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_ACCESS_CTRL) { + *p_qp_attr_mask |= IB_QP_ACCESS_FLAGS; /* optional flag */ + p_ib_qp_attr->qp_access_flags = to_qp_acl(p_ib_qp_mod->state.rtr.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_ALTERNATE_AV) { + *p_qp_attr_mask |= IB_QP_ALT_PATH; /* required flag */ + err = to_av(p_ib_qp->device, + &p_ib_qp_mod->state.rtr.alternate_av, &p_ib_qp_attr->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + } + + // IB_QP_PKEY_INDEX + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_PKEY) { + *p_qp_attr_mask |= IB_QP_PKEY_INDEX; + p_ib_qp_attr->pkey_index = p_ib_qp_mod->state.rtr.pkey_index; + } + break; + + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + // IB_QP_PKEY_INDEX + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_PKEY) { + *p_qp_attr_mask |= IB_QP_PKEY_INDEX; + p_ib_qp_attr->pkey_index = p_ib_qp_mod->state.rtr.pkey_index; + } + + // IB_QP_QKEY + if (p_ib_qp_mod->state.rtr.opts & IB_MOD_QP_QKEY) { + *p_qp_attr_mask |= IB_QP_QKEY; + p_ib_qp_attr->qkey = cl_ntoh32 (p_ib_qp_mod->state.rtr.qkey); + } + break; + + } + break; + + case IB_QPS_RTS: + /* modifying the WQE depth is not supported */ + if( p_ib_qp_mod->state.rts.opts & IB_MOD_QP_SQ_DEPTH || + p_ib_qp_mod->state.rts.opts & IB_MOD_QP_RQ_DEPTH ) + { + status = IB_UNSUPPORTED; + break; + } + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + if (p_mib_qp->state != XIB_QPS_RTS) + *p_qp_attr_mask |= /* required flags */ + IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC | IB_QP_TIMEOUT | + IB_QP_RETRY_CNT |IB_QP_RNR_RETRY; + + // IB_QP_MAX_QP_RD_ATOMIC + p_ib_qp_attr->max_rd_atomic = p_ib_qp_mod->state.rts.init_depth; + + // IB_QP_TIMEOUT + p_ib_qp_attr->timeout = p_ib_qp_mod->state.rts.local_ack_timeout; // XXX: conv + + // IB_QP_RETRY_CNT + p_ib_qp_attr->retry_cnt = p_ib_qp_mod->state.rts.retry_cnt; + + // IB_QP_RNR_RETRY + p_ib_qp_attr->rnr_retry = p_ib_qp_mod->state.rts.rnr_retry_cnt; + + // IB_QP_MAX_DEST_RD_ATOMIC: Update the responder resources for RDMA/ATOMIC (optional for SQD->RTS) + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_RESP_RES) { + *p_qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC; + p_ib_qp_attr->max_dest_rd_atomic = p_ib_qp_mod->state.rts.resp_res; + } + +#ifdef WIN_TO_BE_REMOVED + //TODO: do we need that ? + // Linux patch 4793: PKEY_INDEX is not a legal parameter in the RTR->RTS transition. + + // IB_QP_PKEY_INDEX + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_PKEY) { + *p_qp_attr_mask |= IB_QP_PKEY_INDEX; + p_ib_qp_attr->pkey_index = p_ib_qp_mod->state.rts.pkey_index; + } +#endif + + // IB_QP_MIN_RNR_TIMER + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_RNR_NAK_TIMEOUT) { + *p_qp_attr_mask |= IB_QP_MIN_RNR_TIMER; + p_ib_qp_attr->min_rnr_timer = p_ib_qp_mod->state.rts.rnr_nak_timeout; + } + + // IB_QP_PATH_MIG_STATE + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_APM_STATE) { + *p_qp_attr_mask |= IB_QP_PATH_MIG_STATE; + p_ib_qp_attr->path_mig_state = p_ib_qp_mod->state.rts.apm_state; + } + + // IB_QP_ACCESS_FLAGS + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_ACCESS_CTRL) { + *p_qp_attr_mask |= IB_QP_ACCESS_FLAGS; /* optional flags */ + p_ib_qp_attr->qp_access_flags = to_qp_acl(p_ib_qp_mod->state.rts.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_ALTERNATE_AV) { + *p_qp_attr_mask |= IB_QP_ALT_PATH; /* optional flag */ + err = to_av(p_ib_qp->device, + &p_ib_qp_mod->state.rts.alternate_av, &p_ib_qp_attr->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + p_ib_qp_attr->alt_timeout = p_ib_qp_mod->state.rts.alternate_av.conn.local_ack_timeout; // XXX: conv + } + break; + + case IB_QPT_UNRELIABLE_CONN: + if (p_mib_qp->state != XIB_QPS_RTS) + *p_qp_attr_mask |= /* required flags */ + IB_QP_SQ_PSN; + + // IB_QP_MAX_DEST_RD_ATOMIC: Update the responder resources for RDMA/ATOMIC (optional for SQD->RTS) + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_RESP_RES) { + *p_qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC; + p_ib_qp_attr->max_dest_rd_atomic = p_ib_qp_mod->state.rts.resp_res; + } + +#ifdef WIN_TO_BE_REMOVED + //TODO: do we need that ? + // Linux patch 4793: PKEY_INDEX is not a legal parameter in the RTR->RTS transition. + + // IB_QP_PKEY_INDEX + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_PKEY) { + *p_qp_attr_mask |= IB_QP_PKEY_INDEX; + p_ib_qp_attr->pkey_index = p_ib_qp_mod->state.rts.pkey_index; + } +#endif + + // IB_QP_PATH_MIG_STATE + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_APM_STATE) { + *p_qp_attr_mask |= IB_QP_PATH_MIG_STATE; + p_ib_qp_attr->path_mig_state = p_ib_qp_mod->state.rts.apm_state; + } + + // IB_QP_ACCESS_FLAGS + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_ACCESS_CTRL) { + *p_qp_attr_mask |= IB_QP_ACCESS_FLAGS; /* optional flags */ + p_ib_qp_attr->qp_access_flags = to_qp_acl(p_ib_qp_mod->state.rts.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_ALTERNATE_AV) { + *p_qp_attr_mask |= IB_QP_ALT_PATH; /* optional flag */ + err = to_av(p_ib_qp->device, + &p_ib_qp_mod->state.rts.alternate_av, &p_ib_qp_attr->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + } + break; + + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + if (p_mib_qp->state != XIB_QPS_RTS) + *p_qp_attr_mask |= /* required flags */ + IB_QP_SQ_PSN; + + // IB_QP_QKEY + if (p_ib_qp_mod->state.rts.opts & IB_MOD_QP_QKEY) { + *p_qp_attr_mask |= IB_QP_QKEY; + p_ib_qp_attr->qkey = cl_ntoh32 (p_ib_qp_mod->state.rts.qkey); + } + break; + + break; + + } + + // IB_QP_SQ_PSN: common for all + p_ib_qp_attr->sq_psn = cl_ntoh32 (p_ib_qp_mod->state.rts.sq_psn); + //NB: IB_QP_CUR_STATE flag is not provisioned by IBAL + break; + + case IB_QPS_SQD: + case IB_QPS_SQD_DRAINING: + case IB_QPS_SQD_DRAINED: + *p_qp_attr_mask |= IB_QP_EN_SQD_ASYNC_NOTIFY; + p_ib_qp_attr->en_sqd_async_notify = (u8)p_ib_qp_mod->state.sqd.sqd_event; + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SHIM ,("IB_QP_EN_SQD_ASYNC_NOTIFY seems like unsupported\n")); + break; + + default: + //NB: is this an error case and we need this message ? What about returning an error ? + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped qp_state %d\n", p_ib_qp_mod->req_state)); + break; + + } + + return status; +} + +enum ib_qp_type to_qp_type(ib_qp_type_t qp_type) +{ +#define MAP_TYPE(val1,val2) case val1: ib_qp_type = val2; break + enum ib_qp_type ib_qp_type; + + switch (qp_type) { + MAP_TYPE( IB_QPT_RELIABLE_CONN, IB_QPT_RC ); + MAP_TYPE( IB_QPT_UNRELIABLE_CONN, IB_QPT_UC ); + MAP_TYPE( IB_QPT_UNRELIABLE_DGRM, IB_QPT_UD ); + MAP_TYPE( IB_QPT_QP0, IB_QPT_SMI ); + MAP_TYPE( IB_QPT_QP1, IB_QPT_GSI ); + MAP_TYPE( IB_QPT_RAW_IPV6, IB_QPT_RAW_IP_V6 ); + MAP_TYPE( IB_QPT_RAW_ETHER, IB_QPT_RAW_ETY ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM , + ("Unmapped MLX4 ib_wc_type %d\n", qp_type)); + ib_qp_type = 0xffffffff; + } + return ib_qp_type; +} + +ib_qp_type_t from_qp_type(enum ib_qp_type ib_qp_type) +{ +#define MAP_IB_TYPE(val1,val2) case val1: qp_type = val2; break + ib_qp_type_t qp_type; + + switch (ib_qp_type) { + MAP_IB_TYPE( IB_QPT_RC, IB_QPT_RELIABLE_CONN ); + MAP_IB_TYPE( IB_QPT_UC, IB_QPT_UNRELIABLE_CONN ); + MAP_IB_TYPE( IB_QPT_UD, IB_QPT_UNRELIABLE_DGRM ); + MAP_IB_TYPE( IB_QPT_SMI, IB_QPT_QP0 ); + MAP_IB_TYPE( IB_QPT_GSI, IB_QPT_QP1 ); + MAP_IB_TYPE( IB_QPT_RAW_IP_V6, IB_QPT_RAW_IPV6 ); + MAP_IB_TYPE( IB_QPT_RAW_ETY, IB_QPT_RAW_ETHER ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM , + ("Unmapped MLX4 ib_wc_type %d\n", ib_qp_type)); + qp_type = 0xffffffff; + } + return qp_type; +} + +ib_apm_state_t from_apm_state(enum ib_mig_state apm) +{ + if (apm == IB_MIG_MIGRATED) return IB_APM_MIGRATED; + if (apm == IB_MIG_REARM) return IB_APM_REARM; + if (apm == IB_MIG_ARMED) return IB_APM_ARMED; + return 0xffffffff; +} + +ib_api_status_t +from_qp_attr( + IN const struct ib_qp *p_ib_qp, + IN struct ib_qp_attr *p_ib_qp_attr, + OUT ib_qp_attr_t *p_qp_attr + ) +{ + int err; + RtlZeroMemory( p_qp_attr, sizeof *p_qp_attr ); + p_qp_attr->h_pd = (ib_pd_handle_t)p_ib_qp->pd; + p_qp_attr->qp_type = from_qp_type(p_ib_qp->qp_type); + p_qp_attr->access_ctrl = from_qp_acl(p_ib_qp_attr->qp_access_flags); + p_qp_attr->pkey_index = p_ib_qp_attr->pkey_index; + + p_qp_attr->sq_max_inline = p_ib_qp_attr->cap.max_inline_data; + p_qp_attr->sq_depth = p_ib_qp_attr->cap.max_send_wr; + p_qp_attr->rq_depth = p_ib_qp_attr->cap.max_recv_wr; + p_qp_attr->sq_sge = p_ib_qp_attr->cap.max_send_sge; + p_qp_attr->rq_sge = p_ib_qp_attr->cap.max_recv_sge; + p_qp_attr->init_depth = p_ib_qp_attr->max_rd_atomic; + p_qp_attr->resp_res = p_ib_qp_attr->max_dest_rd_atomic; + + p_qp_attr->h_sq_cq = (ib_cq_handle_t)p_ib_qp->send_cq; + p_qp_attr->h_rq_cq = (ib_cq_handle_t)p_ib_qp->recv_cq; + p_qp_attr->h_srq = (ib_srq_handle_t)p_ib_qp->srq; + + p_qp_attr->sq_signaled = !!((struct mlx4_ib_qp *)p_ib_qp)->sq_signal_bits; + + p_qp_attr->state = from_qp_state( p_ib_qp_attr->qp_state, + p_ib_qp_attr->sq_draining); + p_qp_attr->num = cl_hton32(p_ib_qp->qp_num); + p_qp_attr->dest_num = cl_hton32(p_ib_qp_attr->dest_qp_num); + p_qp_attr->qkey = cl_hton32(p_ib_qp_attr->qkey); + + p_qp_attr->sq_psn = cl_hton32(p_ib_qp_attr->sq_psn); + p_qp_attr->rq_psn = cl_hton32(p_ib_qp_attr->rq_psn); + + p_qp_attr->primary_port = p_ib_qp_attr->port_num; + p_qp_attr->alternate_port = p_ib_qp_attr->alt_port_num; + err = from_av( p_ib_qp->device, p_ib_qp_attr, &p_ib_qp_attr->ah_attr, &p_qp_attr->primary_av); + if (err) + goto err_av; + err = from_av( p_ib_qp->device, p_ib_qp_attr, &p_ib_qp_attr->alt_ah_attr, &p_qp_attr->alternate_av); + if (err) + goto err_av; + p_qp_attr->apm_state = from_apm_state(p_ib_qp_attr->path_mig_state); + + return IB_SUCCESS; + +err_av: + return errno_to_iberr(err); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/data.h b/branches/ConnectX/hw/mlx4/kernel/hca/data.h new file mode 100644 index 00000000..d69965b9 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/data.h @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_data.h 2036 2007-07-25 14:27:12Z leonid $ + */ + +#pragma once + +#include +#include + +extern char mlnx_uvp_lib_name[]; + + +#define MLNX_MAX_HCA 4 +#define MLNX_NUM_HOBKL MLNX_MAX_HCA +#define MLNX_NUM_CB_THR 1 +#define MLNX_SIZE_CB_POOL 256 +#define MLNX_UAL_ALLOC_HCA_UL_RES 1 +#define MLNX_UAL_FREE_HCA_UL_RES 2 + + +// Defines for QP ops +#define MLNX_MAX_NUM_SGE 8 +#define MLNX_MAX_WRS_PER_CHAIN 4 + +#define MLNX_NUM_RESERVED_QPS 16 + +/* + * Completion model. + * 0: No DPC processor assignment + * 1: DPCs per-CQ, processor affinity set at CQ initialization time. + * 2: DPCs per-CQ, processor affinity set at runtime. + * 3: DPCs per-CQ, no processor affinity set. + */ +#define MLNX_COMP_MODEL 3 + +#ifdef DBG +#define VALIDATE_INDEX(index, limit, error, label) \ + { \ + if (index >= limit) \ + { \ + status = error; \ + HCA_PRINT(TRACE_LEVEL_ERROR , g_mlnx_dbg_lvl ,("file %s line %d\n", __FILE__, __LINE__)));\ + goto label; \ + } \ + } +#else +#define VALIDATE_INDEX(index, limit, error, label) +#endif + + + +// Typedefs + +typedef enum { + E_EV_CA=1, + E_EV_QP, + E_EV_CQ, + E_EV_LAST +} ENUM_EVENT_CLASS; + +typedef enum { + E_MR_PHYS=1, + E_MR_SHARED, + E_MR_ANY, + E_MR_INVALID +} ENUM_MR_TYPE; + +/* + * Attribute cache for port info saved to expedite local MAD processing. + * Note that the cache accounts for the worst case GID and PKEY table size + * but is allocated from paged pool, so it's nothing to worry about. + */ + +typedef struct _guid_block +{ + boolean_t valid; + ib_guid_info_t tbl; + +} mlnx_guid_block_t; + +typedef struct _port_info_cache +{ + boolean_t valid; + ib_port_info_t info; + +} mlnx_port_info_cache_t; + +typedef struct _pkey_block +{ + boolean_t valid; + ib_pkey_table_t tbl; + +} mlnx_pkey_block_t; + +typedef struct _sl_vl_cache +{ + boolean_t valid; + ib_slvl_table_t tbl; + +} mlnx_sl_vl_cache_t; + +typedef struct _vl_arb_block +{ + boolean_t valid; + ib_vl_arb_table_t tbl; + +} mlnx_vl_arb_block_t; + +typedef struct _attr_cache +{ + mlnx_guid_block_t guid_block[32]; + mlnx_port_info_cache_t port_info; + mlnx_pkey_block_t pkey_tbl[2048]; + mlnx_sl_vl_cache_t sl_vl; + mlnx_vl_arb_block_t vl_arb[4]; + +} mlnx_cache_t; + +typedef struct _ib_mcast { + ib_gid_t mcast_gid; + struct ib_qp *p_ib_qp; + uint16_t mcast_lid; +} mlnx_mcast_t; + +typedef struct _mlnx_hca_t { + cl_list_item_t list_item; // to include in the HCA chain + net64_t guid; // HCA node Guid + uint32_t hw_ver; // HCA HW version + // HOB + ci_completion_cb_t comp_cb_p; + ci_async_event_cb_t async_cb_p; + const void *ca_context; + void *cl_device_h; + uint32_t index; + cl_async_proc_t *async_proc_mgr_p; +} mlnx_hca_t; + +// Functions +void +setup_ci_interface( + IN const ib_net64_t ca_guid, + IN const int is_livefish, + OUT ci_interface_t *p_interface ); + +void +mlnx_hca_insert( + IN mlnx_hca_t *p_hca ); + +void +mlnx_hca_remove( + IN mlnx_hca_t *p_hca ); + +mlnx_hca_t* +mlnx_hca_from_guid( + IN ib_net64_t guid ); + +/* +void +mlnx_names_from_guid( + IN ib_net64_t guid, + OUT char **hca_name_p, + OUT char **dev_name_p); +*/ + +cl_status_t +mlnx_hcas_init( void ); + +ib_api_status_t +mlnx_set_cb( + IN mlnx_hca_t * p_hca, + IN ci_completion_cb_t comp_cb_p, + IN ci_async_event_cb_t async_cb_p, + IN const void* const ib_context); + +void +mlnx_reset_cb( + IN mlnx_hca_t * p_hca); + +void +from_hca_cap( + IN struct ib_device *ib_dev, + IN struct ib_device_attr *hca_info_p, + IN struct ib_port_attr *hca_ports, + OUT ib_ca_attr_t *ca_attr_p); + +ib_api_status_t +mlnx_local_mad ( + IN const ib_ca_handle_t h_ca, + IN const uint8_t port_num, + IN const ib_av_attr_t *p_src_av_attr, + IN const ib_mad_t *p_mad_in, + OUT ib_mad_t *p_mad_out ); + +ib_api_status_t +fw_access_ctrl( + IN const void* __ptr64 context, + IN const void* __ptr64* const handle_array OPTIONAL, + IN uint32_t num_handles, + IN ib_ci_op_t* const p_ci_op, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL); + +void unmap_crspace_for_all( struct ib_ucontext *p_context ); + +void cq_comp_handler(struct ib_cq *cq, void *context); + +void ca_event_handler(struct ib_event *ev, void *context); + +void srq_event_handler(struct ib_event *ev, void *context); + +void qp_event_handler(struct ib_event *ev, void *context); + +void cq_event_handler(struct ib_event *ev, void *context); + +ib_api_status_t +to_qp_attr( + IN const struct ib_qp *ib_qp_p, + IN ib_qp_type_t qp_type, + IN const ib_qp_mod_t *modify_attr_p, + OUT struct ib_qp_attr *qp_attr_p, + OUT int *qp_attr_mask_p + ); + +ib_api_status_t +from_qp_attr( + IN const struct ib_qp *p_ib_qp, + IN struct ib_qp_attr *p_ib_qp_attr, + OUT ib_qp_attr_t *p_qp_attr + ); + +enum ib_qp_type to_qp_type(ib_qp_type_t qp_type); + +ib_qp_type_t from_qp_type(enum ib_qp_type ib_qp_type); + +int +to_av( + IN const struct ib_device *p_ib_dev, + IN const ib_av_attr_t *p_ib_av_attr, + OUT struct ib_ah_attr *p_ib_ah_attr); + +int from_av( + IN const struct ib_device *p_ib_dev, + IN struct ib_qp_attr *p_ib_qp_attr, + IN struct ib_ah_attr *p_ib_ah_attr, + OUT ib_av_attr_t *p_ib_av_attr); + +enum ib_access_flags +to_qp_acl( + IN ib_access_t ibal_acl); + +static inline int from_umv_buf(void *dest, ci_umv_buf_t* const p_umv_buf, size_t len) +{ + RtlCopyMemory(dest, p_umv_buf->p_inout_buf, len); + return 0; +} + +static inline int to_umv_buf(ci_umv_buf_t* const p_umv_buf, void *src, size_t len) +{ + if (p_umv_buf->output_size < len) { + p_umv_buf->status = IB_INSUFFICIENT_MEMORY; + p_umv_buf->output_size = 0; + return -EFAULT; + } + RtlCopyMemory(p_umv_buf->p_inout_buf, src, len); + p_umv_buf->status = IB_SUCCESS; + p_umv_buf->output_size = (uint32_t)len; + return 0; +} + + +/* interface */ + +void +mlnx_ca_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_pd_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_av_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_cq_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_qp_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_srq_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_mr_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_direct_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_mcast_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_ca_if_livefish( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_mr_if_livefish( + IN OUT ci_interface_t *p_interface ); + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/debug.h b/branches/ConnectX/hw/mlx4/kernel/hca/debug.h new file mode 100644 index 00000000..56bd4f2a --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/debug.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_debug.h 1936 2007-02-06 16:04:33Z sleybo $ + */ + + +#pragma once + +#if defined(EVENT_TRACING) +// +// Software Tracing Definitions +// + +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(Mlx4HcaCtlGuid,(F8C96A49,AE22,41e9,8025,D7E416884D89), \ + WPP_DEFINE_BIT( HCA_DBG_DEV) \ + WPP_DEFINE_BIT( HCA_DBG_PNP) \ + WPP_DEFINE_BIT( HCA_DBG_INIT) \ + WPP_DEFINE_BIT( HCA_DBG_MAD) \ + WPP_DEFINE_BIT( HCA_DBG_PO) \ + WPP_DEFINE_BIT( HCA_DBG_PD)\ + WPP_DEFINE_BIT( HCA_DBG_CQ) \ + WPP_DEFINE_BIT( HCA_DBG_QP) \ + WPP_DEFINE_BIT( HCA_DBG_MEMORY) \ + WPP_DEFINE_BIT( HCA_DBG_AV) \ + WPP_DEFINE_BIT( HCA_DBG_SRQ) \ + WPP_DEFINE_BIT( HCA_DBG_MCAST) \ + WPP_DEFINE_BIT( HCA_DBG_LOW) \ + WPP_DEFINE_BIT( HCA_DBG_SHIM)) + + +#define WPP_GLOBALLOGGER + + +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) + + +// begin_wpp config +// HCA_ENTER(FLAG); +// HCA_EXIT(FLAG); +// USEPREFIX(HCA_PRINT, "%!STDPREFIX! [MTHCA] :%!FUNC!() :"); +// USESUFFIX(HCA_ENTER, " [MTHCA] :%!FUNC!()["); +// USESUFFIX(HCA_EXIT, " [MTHCA] :%!FUNC!()]"); +// end_wpp + + + +#else + + +#include + +/* + * Debug macros + */ + + +#define HCA_DBG_DEV (1 << 0) +#define HCA_DBG_PNP (1<<1) +#define HCA_DBG_INIT (1 << 2) +#define HCA_DBG_MAD (1 << 3) +#define HCA_DBG_PO (1 << 4) +#define HCA_DBG_PD (1<<5) +#define HCA_DBG_QP (1 << 6) +#define HCA_DBG_CQ (1 << 7) +#define HCA_DBG_MEMORY (1 << 8) +#define HCA_DBG_AV (1<<9) +#define HCA_DBG_SRQ (1 << 10) +#define HCA_DBG_MCAST (1<<11) +#define HCA_DBG_LOW (1 << 12) +#define HCA_DBG_SHIM (1 << 13) + + +#if DBG + +// assignment of _level_ is need to to overcome warning C4127 +#define HCA_PRINT(_level_,_flag_,_msg_) \ + { \ + int __lvl = _level_; \ + if (g.DebugPrintLevel >= (_level_) && \ + (g.DebugPrintFlags & (_flag_))) { \ + DbgPrint ("~%d:[MLX4_HCA] %s() :", KeGetCurrentProcessorNumber(), __FUNCTION__); \ + if(__lvl == TRACE_LEVEL_ERROR) DbgPrint ("***ERROR*** "); \ + DbgPrint _msg_; \ + } \ + } + +#else + +#define HCA_PRINT(lvl ,flags, msg) + +#endif + +#define HCA_ENTER(flags)\ + HCA_PRINT(TRACE_LEVEL_VERBOSE, flags,("[\n")); + +#define HCA_EXIT(flags)\ + HCA_PRINT(TRACE_LEVEL_VERBOSE, flags, ("]\n" )); + + +#endif //EVENT_TRACING + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/direct.c b/branches/ConnectX/hw/mlx4/kernel/hca/direct.c new file mode 100644 index 00000000..3ac85e58 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/direct.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_direct.c 1936 2007-02-06 16:04:33Z sleybo $ + */ + + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "direct.tmh" +#endif + + +/* +* Work Request Processing Verbs. +*/ + + +ib_api_status_t +mlnx_post_send ( + IN const ib_qp_handle_t h_qp, + IN ib_send_wr_t *p_send_wr, + OUT ib_send_wr_t **pp_failed ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + + HCA_ENTER(HCA_DBG_QP); + + err = p_ib_qp->device->post_send(p_ib_qp, p_send_wr, pp_failed ); + if (err) { + if (err == -ENOMEM) + status = IB_INSUFFICIENT_RESOURCES; + else + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("post_send failed with status %x\n", status)); + HCA_EXIT(HCA_DBG_QP); + return status; + +} + + +ib_api_status_t +mlnx_post_recv ( + IN const ib_qp_handle_t h_qp, + IN ib_recv_wr_t *p_recv_wr, + OUT ib_recv_wr_t **pp_failed OPTIONAL ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + + HCA_ENTER(HCA_DBG_QP); + + err = p_ib_qp->device->post_recv(p_ib_qp, p_recv_wr, pp_failed ); + if (err) { + if (err == -ENOMEM) + status = IB_INSUFFICIENT_RESOURCES; + else + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("post_recv failed with status %x\n", status)); + HCA_EXIT(HCA_DBG_QP); + return status; + +} + +ib_api_status_t +mlnx_post_srq_recv ( + IN const ib_srq_handle_t h_srq, + IN ib_recv_wr_t *p_recv_wr, + OUT ib_recv_wr_t **pp_failed OPTIONAL ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *p_ib_srq = (struct ib_srq *)h_srq; + + HCA_ENTER(HCA_DBG_SRQ); + + err = p_ib_srq->device->post_srq_recv(p_ib_srq, p_recv_wr, pp_failed ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("post_srq_recv failed (%d)\n", err)); + if (err == -ENOMEM) + status = IB_INSUFFICIENT_RESOURCES; + else + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("post_srq_recv failed with status %x\n", status)); + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +/* +* Completion Processing and Completion Notification Request Verbs. +*/ + +ib_api_status_t +mlnx_peek_cq( + IN const ib_cq_handle_t h_cq, + OUT uint32_t* const p_n_cqes ) +{ + int err; + ib_api_status_t status; + struct ib_cq *p_ib_cq = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_CQ); + + err = p_ib_cq->device->peek_cq ? + p_ib_cq->device->peek_cq(p_ib_cq, *p_n_cqes) : -ENOSYS; + status = errno_to_iberr(err); + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_peek_cq failed with status %x\n", status)); + + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_poll_cq ( + IN const ib_cq_handle_t h_cq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *p_ib_cq = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_CQ); + + // sanity checks + if (!pp_free_wclist || !pp_done_wclist || !*pp_free_wclist) { + status = IB_INVALID_PARAMETER; + goto err_invalid_params; + } + + // poll CQ + err = p_ib_cq->device->poll_cq(p_ib_cq, pp_free_wclist, pp_done_wclist); + if (err < 0) + status = errno_to_iberr(err); + else if (!*pp_done_wclist) + status = IB_NOT_FOUND; + +err_invalid_params: + if (status != IB_SUCCESS && status != IB_NOT_FOUND) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("mthca_poll_cq_list failed with status %x\n", status)); + HCA_EXIT(HCA_DBG_CQ); + return status; + +} + +ib_api_status_t +mlnx_enable_cq_notify ( + IN const ib_cq_handle_t h_cq, + IN const boolean_t solicited ) +{ + int err; + ib_api_status_t status; + struct ib_cq *p_ib_cq = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_CQ); + + err = ib_req_notify_cq(p_ib_cq, + (solicited) ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP ); + status = errno_to_iberr(err); + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_req_notify_cq failed with status %x\n", status)); + + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_enable_ncomp_cq_notify ( + IN const ib_cq_handle_t h_cq, + IN const uint32_t n_cqes ) +{ + int err; + ib_api_status_t status; + struct ib_cq *p_ib_cq = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_CQ); + + err = ib_req_ncomp_notif(p_ib_cq, n_cqes); + status = errno_to_iberr(err); + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_req_ncomp_notif failed with status %x\n", status)); + + HCA_EXIT(HCA_DBG_CQ); + return status; +} + + +ib_api_status_t +mlnx_bind_mw ( + IN const ib_mw_handle_t h_mw, + IN const ib_qp_handle_t h_qp, + IN ib_bind_wr_t* const p_mw_bind, + OUT net32_t* const p_rkey ) +{ + int err; + ib_api_status_t status; + struct ib_mw *p_ib_mw = (struct ib_mw *)h_mw; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + struct ib_mw_bind ib_mw_bind; + + UNUSED_PARAM(p_mw_bind); + UNUSED_PARAM(p_rkey); + + HCA_ENTER(HCA_DBG_MEMORY); + + // TODO: convert ib_bind_wr_t to struct ib_mw_bind + + err = p_ib_qp->device->bind_mw ? + p_ib_qp->device->bind_mw(p_ib_qp, p_ib_mw, &ib_mw_bind) : -ENOSYS; + status = errno_to_iberr(err); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_bind_mw failed with status %x\n", status)); + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + + +void +mlnx_direct_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->post_send = mlnx_post_send; + p_interface->post_recv = mlnx_post_recv; + p_interface->post_srq_recv = mlnx_post_srq_recv; + + p_interface->enable_ncomp_cq_notify = mlnx_enable_ncomp_cq_notify; + p_interface->peek_cq = NULL; /* mlnx_peek_cq: Not implemented */ + p_interface->poll_cq = mlnx_poll_cq; + p_interface->enable_cq_notify = mlnx_enable_cq_notify; + + p_interface->bind_mw = mlnx_bind_mw; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/drv.c b/branches/ConnectX/hw/mlx4/kernel/hca/drv.c new file mode 100644 index 00000000..4e9fb2de --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/drv.c @@ -0,0 +1,2669 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" +#include +#include + +#if defined(EVENT_TRACING) +#include "drv.tmh" +#endif + +#define DRV_VERSION "1.0" +#define DRV_RELDATE "02/01/2008" + +GLOBALS g; + +/* + * UVP name does not include file extension. For debug builds, UAL + * will append "d.dll". For release builds, UAL will append ".dll" + */ +char mlnx_uvp_lib_name[MAX_LIB_NAME] = {"mlx4u"}; + + +static void +__put_ifc( + IN PINTERFACE p_ifc ) +{ + HCA_ENTER( HCA_DBG_PNP ); + p_ifc->InterfaceDereference( p_ifc->Context ); + HCA_EXIT( HCA_DBG_PNP ); +} + +static int __get_dev_info(PFDO_DEVICE_DATA p_fdo, __be64 *node_guid, u32 *hw_id) +{ + struct ib_device_attr device_attr; + struct ib_device *p_ibdev = p_fdo->bus_ib_ifc.p_ibdev; + int err; + + HCA_ENTER( HCA_DBG_PNP ); + if ( hca_is_livefish(p_fdo) ) { + *node_guid = cl_hton64((uint64_t)(ULONG_PTR)p_ibdev); + p_ibdev->node_guid = *node_guid; + *hw_id = 0; + return 0; + } + + err = (p_ibdev->query_device)( p_ibdev, &device_attr ); + if (err) + return err; + + *node_guid = p_ibdev->node_guid; + *hw_id = device_attr.hw_ver; + HCA_EXIT( HCA_DBG_PNP ); + return 0; +} + +#ifndef USE_WDM_FRAMEWORK + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (INIT, DriverEntry) +#pragma alloc_text (PAGE, EvtDeviceAdd) +#pragma alloc_text (PAGE, EvtDriverUnload) +#pragma alloc_text (PAGE, EvtDeviceD0Entry) +#pragma alloc_text (PAGE, EvtDeviceD0Exit) +#pragma alloc_text (PAGE, EvtPrepareHardware) +#pragma alloc_text (PAGE, EvtReleaseHardware) +#endif + +static NTSTATUS +__get_ci_interface( + IN PFDO_DEVICE_DATA p_fdo ) +{ + NTSTATUS status; + IRP *p_irp; + IO_STATUS_BLOCK ioStatus; + IO_STACK_LOCATION *pIoStack; + KEVENT event; + + HCA_ENTER( HCA_DBG_PNP ); + + KeInitializeEvent( &event, NotificationEvent, FALSE ); + + /* Query for the verbs interface. */ + p_irp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, p_fdo->p_al_dev, + NULL, 0, NULL, &event, &ioStatus ); + if( !p_irp ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Format the IRP. */ + pIoStack = IoGetNextIrpStackLocation( p_irp ); + pIoStack->MinorFunction = IRP_MN_QUERY_INTERFACE; + pIoStack->Parameters.QueryInterface.Version = IB_CI_INTERFACE_VERSION; + pIoStack->Parameters.QueryInterface.Size = sizeof(ib_ci_ifc_t); + pIoStack->Parameters.QueryInterface.Interface = + (INTERFACE*)&p_fdo->ci_ifc; + pIoStack->Parameters.QueryInterface.InterfaceSpecificData = NULL; + pIoStack->Parameters.QueryInterface.InterfaceType = + &GUID_IB_CI_INTERFACE; + p_irp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + /* Send the IRP. */ + status = IoCallDriver( p_fdo->p_al_dev, p_irp ); + if( status == STATUS_PENDING ) + { + KeWaitForSingleObject( &event, Executive, KernelMode, + FALSE, NULL ); + + status = ioStatus.Status; + } + + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Query interface for verbs returned %08x.\n", status)); + return status; + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +__pnp_notify_target( + IN void *pNotifyStruct, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + PFDO_DEVICE_DATA p_fdo = context; + PDEVICE_OBJECT p_dev_obj = WdfDeviceWdmGetDeviceObject(p_fdo->FdoDevice); + TARGET_DEVICE_REMOVAL_NOTIFICATION *pNotify; + + HCA_ENTER( HCA_DBG_PNP ); + + pNotify = (TARGET_DEVICE_REMOVAL_NOTIFICATION*)pNotifyStruct; + + if( IsEqualGUID( &pNotify->Event, &GUID_TARGET_DEVICE_QUERY_REMOVE ) ) + { + if ( p_fdo->state == HCA_REGISTERED) { + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + p_fdo->state = HCA_IFC_DEREFERENCED; + } + + /* Release AL's file object so that it can unload. */ + CL_ASSERT( p_fdo->p_al_dev ); + CL_ASSERT( p_fdo->p_al_file_obj ); + CL_ASSERT( p_fdo->p_al_file_obj == pNotify->FileObject ); + if( p_fdo->p_al_file_obj ) { + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + p_fdo->p_al_dev = NULL; + } + } + else if( IsEqualGUID( &pNotify->Event, + &GUID_TARGET_DEVICE_REMOVE_COMPLETE ) ) + { + if (p_fdo->ci_ifc.deregister_ca) { + /* Notify AL that the CA is being removed. */ + p_fdo->ci_ifc.deregister_ca( p_fdo->hca.guid ); + p_fdo->ci_ifc.deregister_ca = NULL; + } + + if ( p_fdo->state == HCA_REGISTERED) { + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + } + p_fdo->state = HCA_STARTED; + + /* Release AL's file object so that it can unload. */ + if( p_fdo->p_al_file_obj ) + { + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + p_fdo->p_al_dev = NULL; + } + + /* Cancel our target device change registration. */ + if (p_fdo->pnp_target_entry) { + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; + } + + } + else if( IsEqualGUID( &pNotify->Event, + &GUID_TARGET_DEVICE_REMOVE_CANCELLED ) ) + { + /* Cancel our target device change registration. */ + if (p_fdo->pnp_target_entry) { + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; + } + + /* Get the device object pointer for the AL. */ + CL_ASSERT( !p_fdo->p_al_file_obj ); + CL_ASSERT( !p_fdo->p_al_dev ); + /* Get the AL device object. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("Calling IoGetDeviceObjectPointer.\n")); + status = IoGetDeviceObjectPointer( &p_fdo->al_sym_name, + FILE_ALL_ACCESS, &p_fdo->p_al_file_obj, &p_fdo->p_al_dev ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); + return STATUS_SUCCESS; + } + + /* Register for removal notification of the IB Fabric root device. */ + status = IoRegisterPlugPlayNotification( + EventCategoryTargetDeviceChange, 0, p_fdo->p_al_file_obj, + p_dev_obj->DriverObject, __pnp_notify_target, p_fdo, + &p_fdo->pnp_target_entry ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + return status; + } + + CL_ASSERT( p_fdo->state == HCA_IFC_DEREFERENCED ); + if ( p_fdo->state == HCA_IFC_DEREFERENCED) { + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceReference( p_fdo->ci_ifc.wdm.Context ); + p_fdo->state = HCA_REGISTERED; + } + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static ci_interface_t* +__alloc_hca_ifc( + IN PFDO_DEVICE_DATA const p_fdo ) +{ + ci_interface_t *pIfc; + + HCA_ENTER( HCA_DBG_PNP ); + + pIfc = + (ci_interface_t*)ExAllocatePoolWithTag( PagedPool, sizeof(ci_interface_t), MT_TAG_KERNEL ); + if( !pIfc ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Failed to allocate ci_interface_t (%d bytes).\n", + sizeof(ci_interface_t))); + return NULL; + } + + setup_ci_interface( p_fdo->hca.guid, !!hca_is_livefish(p_fdo), pIfc ); + + pIfc->p_hca_dev = WdfDeviceWdmGetPhysicalDevice(p_fdo->FdoDevice); + pIfc->vend_id = (uint32_t)p_fdo->bus_ib_ifc.pdev->ven_id; + pIfc->dev_id = (uint16_t)p_fdo->bus_ib_ifc.pdev->dev_id; + pIfc->dev_revision = (uint16_t)p_fdo->hca.hw_ver; + + HCA_EXIT( HCA_DBG_PNP ); + return pIfc; +} + +static void +__hca_deregister( + IN PFDO_DEVICE_DATA p_fdo ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + if ( p_fdo->state == HCA_REGISTERED) { + if (p_fdo->ci_ifc.deregister_ca) { + /* Notify AL that the CA is being removed. */ + p_fdo->ci_ifc.deregister_ca( p_fdo->hca.guid ); + p_fdo->ci_ifc.deregister_ca = NULL; + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + p_fdo->state = HCA_STARTED; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP, + ("***** HCA deregistered \n")); + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + +static NTSTATUS +__hca_register( + IN PFDO_DEVICE_DATA p_fdo ) +{ + NTSTATUS status; + ib_api_status_t ib_status; + ci_interface_t *p_hca_ifc; + + HCA_ENTER( HCA_DBG_PNP ); + + ASSERT( p_fdo->state == HCA_STARTED ); + ASSERT( p_fdo->p_al_dev ); + + /* Get the AL's lower interface. */ + status = __get_ci_interface( p_fdo ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); + goto exit; + } + + /* Allocate and populate our HCA interface structure. */ + p_hca_ifc = __alloc_hca_ifc( p_fdo ); + if( !p_hca_ifc ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("__alloc_hca_ifc failed.\n")); + status = STATUS_NO_MEMORY; + goto exit; + } + + /* Notify AL that we're available... */ + ib_status = p_fdo->ci_ifc.register_ca( p_hca_ifc ); + ExFreePool( p_hca_ifc ); + if( ib_status != IB_SUCCESS ) + { + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + p_fdo->state = HCA_REGISTERED; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP, + ("***** HCA registered \n")); +exit: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +static NTSTATUS +__pnp_notify_ifc( + IN void *pNotifyStruct, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + DEVICE_INTERFACE_CHANGE_NOTIFICATION *pNotify; + PFDO_DEVICE_DATA p_fdo = context; + PDEVICE_OBJECT p_dev_obj = WdfDeviceWdmGetDeviceObject(p_fdo->FdoDevice); + + HCA_ENTER( HCA_DBG_PNP ); + + pNotify = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)pNotifyStruct; + + if( !IsEqualGUID( &pNotify->Event, &GUID_DEVICE_INTERFACE_ARRIVAL ) ) + goto done; + + /* + * Sanity check. We should only be getting notifications of the + * CI interface exported by AL. + */ + ASSERT( + IsEqualGUID( &pNotify->InterfaceClassGuid, &GUID_IB_CI_INTERFACE ) ); + + if( p_fdo->state != HCA_STARTED ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Invalid state: %d\n", p_fdo->state)); + goto done; + } + + /* save symbolic name of IBAL for a case of cancelled IBAL removal */ + if (!p_fdo->al_sym_name.Buffer) { + p_fdo->al_sym_name.Length = pNotify->SymbolicLinkName->Length; + p_fdo->al_sym_name.MaximumLength = pNotify->SymbolicLinkName->MaximumLength; + p_fdo->al_sym_name.Buffer = ExAllocatePoolWithTag( NonPagedPool, + p_fdo->al_sym_name.MaximumLength * sizeof(wchar_t), MT_TAG_KERNEL ); + if (!p_fdo->al_sym_name.Buffer) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("allocation of sym IBAL name failed.\n")); + goto done; + } + RtlCopyUnicodeString( &p_fdo->al_sym_name, pNotify->SymbolicLinkName ); + } + + ASSERT( !p_fdo->p_al_dev ); + ASSERT( !p_fdo->p_al_file_obj ); + + /* Get the AL device object. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Calling IoGetDeviceObjectPointer.\n")); + status = IoGetDeviceObjectPointer( pNotify->SymbolicLinkName, + FILE_ALL_ACCESS, &p_fdo->p_al_file_obj, &p_fdo->p_al_dev ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); + goto done; + } + + /* Register for removal notification of the IB Fabric root device. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Registering for target notifications.\n")); + status = IoRegisterPlugPlayNotification( + EventCategoryTargetDeviceChange, 0, p_fdo->p_al_file_obj, + p_dev_obj->DriverObject, __pnp_notify_target, p_fdo, + &p_fdo->pnp_target_entry ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + goto err_reg_notify; + } + + status = __hca_register( p_fdo ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); + goto err_reg_hca; + } + goto done; + +err_reg_hca: + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; +err_reg_notify: + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + p_fdo->p_al_dev = NULL; +done: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +/* Forwards the request to the HCA's PDO. */ +static NTSTATUS +__get_ifc( + IN WDFDEVICE const FdoDevice, + IN const GUID* const p_guid, + IN USHORT size, + IN USHORT Version, + IN OUT PVOID InterfaceSpecificData, + OUT PINTERFACE p_ifc ) +{ + NTSTATUS status; + + HCA_ENTER( HCA_DBG_PNP ); + + status = WdfFdoQueryForInterface( FdoDevice, p_guid, p_ifc, + size, Version, InterfaceSpecificData ); + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +static void +__unmap_hca_memory( + IN PFDO_DEVICE_DATA const p_fdo ) +{ + struct pci_dev *pdev = p_fdo->bus_ib_ifc.pdev; + int i; + + HCA_ENTER( HCA_DBG_PNP ); + + for( i = 0; i < HCA_BAR_TYPE_MAX; i++ ) { + if (pdev->bar[i].virt) { + MmUnmapIoSpace( pdev->bar[i].virt, pdev->bar[i].size ); + cl_memclr( &pdev->bar[i], sizeof(hca_bar_t) ); + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + +/* release the resources, allocated in hca_start */ +static void +__hca_release_resources( + IN WDFDEVICE Device ) +{ + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + + HCA_ENTER( HCA_DBG_PNP ); + + switch( p_fdo->state ) + { + case HCA_REGISTERED: + __hca_deregister( p_fdo ); + + /* Fall through. */ + case HCA_STARTED: + /* dequeue HCA */ + mlnx_hca_remove( &p_fdo->hca ); + } + + if (p_fdo->al_sym_name.Buffer) { + ExFreePool( p_fdo->al_sym_name.Buffer ); + p_fdo->al_sym_name.Buffer = NULL; + } + + if( p_fdo->pnp_target_entry ) + { + ASSERT( p_fdo->pnp_ifc_entry ); + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; + } + + if( p_fdo->pnp_ifc_entry ) { + IoUnregisterPlugPlayNotification( p_fdo->pnp_ifc_entry ); + p_fdo->pnp_ifc_entry = NULL; + } + + if( p_fdo->p_al_file_obj ) { + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + } + + __unmap_hca_memory( p_fdo ); + + p_fdo->state = HCA_ADDED; + + HCA_EXIT( HCA_DBG_PNP ); +} + +NTSTATUS +EvtDeviceD0Entry( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE PreviousState + ) +{ + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + NTSTATUS status = STATUS_SUCCESS; + + UNUSED_PARAM(PreviousState); + HCA_ENTER( HCA_DBG_PNP ); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, ("EvtDeviceD0Entry: PreviousState 0x%x\n", PreviousState)); + + /* Connect to IBAL */ + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("***** Connect to IBAL, IRQL %d\n", KeGetCurrentIrql())); + + if( p_fdo->p_al_dev && p_fdo->state == HCA_STARTED) { + status = __hca_register( p_fdo ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("!!! __hca_register failed (%#x) \n", status)); + status = STATUS_UNSUCCESSFUL; + } + } + + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + +NTSTATUS +EvtDeviceD0Exit( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE TargetState + ) +{ + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + + HCA_ENTER( HCA_DBG_PNP ); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, ("EvtDeviceD0Exit: TargetState 0x%x\n", TargetState)); + + switch (TargetState) { + case WdfPowerDeviceD1: /* hopefully, it is STANDBY state */ + case WdfPowerDevicePrepareForHibernation: + if (atomic_read(&p_fdo->usecnt)) { + status = STATUS_UNSUCCESSFUL; + break; + } + /* Fall through. */ + default: + __hca_deregister( p_fdo ); + status = STATUS_SUCCESS; + break; + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +NTSTATUS +EvtDevicePrepareHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesRaw, + IN WDFCMRESLIST ResourcesTranslated + ) +{ + int err; + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + PDEVICE_OBJECT p_dev_obj = WdfDeviceWdmGetDeviceObject(Device); + BUS_INTERFACE_STANDARD bus_pci_ifc; + + UNUSED_PARAM(ResourcesRaw); + UNUSED_PARAM(ResourcesTranslated); + + HCA_ENTER( HCA_DBG_PNP ); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, ("EvtPrepareHardware: \n")); + + ASSERT(p_dev_obj); + + /* get PCI BUS interface */ + status = __get_ifc( Device, &GUID_BUS_INTERFACE_STANDARD, + sizeof(BUS_INTERFACE_STANDARD), 1, NULL, (PINTERFACE)&bus_pci_ifc); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("Getting PCI BUS interface failed: status=0x%x\n", status)); + return status; + } + RtlCopyMemory( &p_fdo->bus_pci_ifc, &bus_pci_ifc, sizeof(BUS_INTERFACE_STANDARD) ); + p_fdo->bus_pci_ifc_taken = TRUE; + + /* get MLX4_BUS IB interface */ + status = __get_ifc( Device, &MLX4_BUS_IB_INTERFACE_GUID, + sizeof(MLX4_BUS_IB_INTERFACE), 1, NULL, (PINTERFACE)&p_fdo->bus_ib_ifc); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("Getting MLX4 BUS interface failed: status=0x%x\n", status)); + return status; + } + p_fdo->bus_ib_ifc_taken = TRUE; + p_fdo->bus_ib_ifc.p_ibdev->x.p_fdo = p_fdo; + + + /* get node GUID */ + err = __get_dev_info( p_fdo, &p_fdo->hca.guid, &p_fdo->hca.hw_ver ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , + ("can't get guid - ib_query_device() failed (%08X)\n", err )); + //TODO: no cleanup on error + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* queue HCA */ + mlnx_hca_insert( &p_fdo->hca ); + + /* + * Change the state since the PnP callback can happen + * before the callback returns. + */ + p_fdo->state = HCA_STARTED; + + /* Register for interface arrival of the IB_AL device. */ + status = IoRegisterPlugPlayNotification( + EventCategoryDeviceInterfaceChange, + PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, + (void*)&GUID_IB_CI_INTERFACE, p_dev_obj->DriverObject, + __pnp_notify_ifc, p_fdo, &p_fdo->pnp_ifc_entry ); + if( !NT_SUCCESS( status ) ) + { + p_fdo->state = HCA_ADDED; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +NTSTATUS +EvtDeviceReleaseHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesTranslated + ) +{ + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + + UNUSED_PARAM(ResourcesTranslated); + + HCA_ENTER( HCA_DBG_PNP ); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, ("EvtReleaseHardware: FdoData=0x%p\n", p_fdo)); + + // release IBBUS resources + __hca_release_resources(Device); + + // release MLX4_BUS resources + if(p_fdo->bus_ib_ifc_taken) { + p_fdo->bus_ib_ifc_taken = FALSE; + __put_ifc( (PINTERFACE)&p_fdo->bus_ib_ifc ); + } + + // release PCI BUS resources + if(p_fdo->bus_pci_ifc_taken) { + p_fdo->bus_pci_ifc_taken = FALSE; + __put_ifc( (PINTERFACE)&p_fdo->bus_pci_ifc ); + } + + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + +NTSTATUS +EvtDeviceQueryRemove( + IN WDFDEVICE Device + ) +{ + PFDO_DEVICE_DATA p_fdo = FdoGetData(Device); + HCA_ENTER( HCA_DBG_PNP ); + if (atomic_read(&p_fdo->usecnt)) { + DbgPrint( "MLX4: Can't get unloaded. %d applications are still in work\n", p_fdo->usecnt); + return STATUS_UNSUCCESSFUL; + } + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + + +NTSTATUS +EvtDeviceAdd( + IN WDFDRIVER Driver, + IN PWDFDEVICE_INIT DeviceInit + ) +/*++ +Routine Description: + + EvtDeviceAdd is called by the framework in response to AddDevice + call from the PnP manager. We create and initialize a device object to + represent a new instance of mxe bus. + +Arguments: + + Driver - Handle to a framework driver object created in DriverEntry + + DeviceInit - Pointer to a framework-allocated WDFDEVICE_INIT structure. + +Return Value: + + NTSTATUS + +--*/ +{ + WDF_OBJECT_ATTRIBUTES attributes; + NTSTATUS status; + WDFDEVICE device; + PFDO_DEVICE_DATA p_fdo; + WDF_PNPPOWER_EVENT_CALLBACKS Callbacks; + + UNREFERENCED_PARAMETER(Driver); + + PAGED_CODE (); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, ("EvtDeviceAdd: 0x%p\n", Driver)); + // + // register PnP & Power stuff + // + WDF_PNPPOWER_EVENT_CALLBACKS_INIT(&Callbacks); + Callbacks.EvtDevicePrepareHardware = EvtDevicePrepareHardware; + Callbacks.EvtDeviceReleaseHardware = EvtDeviceReleaseHardware; + Callbacks.EvtDeviceQueryRemove = EvtDeviceQueryRemove; + Callbacks.EvtDeviceD0Entry = EvtDeviceD0Entry; + Callbacks.EvtDeviceD0Exit = EvtDeviceD0Exit; + + WdfDeviceInitSetPnpPowerEventCallbacks( DeviceInit, &Callbacks ); + + // + // Initialize all the properties specific to the device. + // Framework has default values for the one that are not + // set explicitly here. So please read the doc and make sure + // you are okay with the defaults. + // + WdfDeviceInitSetDeviceType(DeviceInit, FILE_DEVICE_INFINIBAND); + WdfDeviceInitSetExclusive(DeviceInit, FALSE); + + // + // Initialize attributes structure to specify size and accessor function + // for storing device context. + // + WDF_OBJECT_ATTRIBUTES_INIT_CONTEXT_TYPE(&attributes, FDO_DEVICE_DATA); + + // + // Create a framework device object. In response to this call, framework + // creates a WDM deviceobject. + // + status = WdfDeviceCreate(&DeviceInit, &attributes, &device); + if (!NT_SUCCESS(status)) { + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_PNP, ("EvtDeviceAdd: WdfDeviceCreate failed with 0x%x\n", status)); + goto end; + } + + // + // Init device context. + // + p_fdo = FdoGetData(device); + RtlZeroMemory(p_fdo, sizeof(FDO_DEVICE_DATA)); + p_fdo->FdoDevice = device; + spin_lock_init( &p_fdo->uctx_lock ); + cl_qlist_init( &p_fdo->uctx_list ); + atomic_set(&p_fdo->usecnt, 0); + p_fdo->state = HCA_ADDED; + + // + // WMI + // + status = WmiRegistration(device); + if (!NT_SUCCESS(status)) { + return status; + } + + status = STATUS_SUCCESS; + +end: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +void +EvtDriverUnload( + IN WDFDRIVER Driver + ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + UNUSED_PARAM( Driver ); + + HCA_EXIT( HCA_DBG_PNP ); +#if defined(EVENT_TRACING) + WPP_CLEANUP(WdfDriverWdmGetDriverObject(Driver)); +#endif +} + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT DriverObject, + IN PUNICODE_STRING RegistryPath + ) +/*++ +Routine Description: + + Initialize the call backs structure of Driver Framework. + +Arguments: + + DriverObject - pointer to the driver object + + RegistryPath - pointer to a unicode string representing the path, + to driver-specific key in the registry. + +Return Value: + + NT Status Code + +--*/ +{ + WDF_DRIVER_CONFIG config; + NTSTATUS status; + WDFDRIVER hDriver; + +#if defined(EVENT_TRACING) + WPP_INIT_TRACING(DriverObject, RegistryPath); +#endif + + // global initializations + g.DebugPrintLevel = TRACE_LEVEL_VERBOSE; + g.DebugPrintFlags = 0xffff; + HCA_ENTER( HCA_DBG_PNP ); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Built %s %s, Version %s, RelDate %s\n", + __DATE__, __TIME__, DRV_VERSION, DRV_RELDATE)); + status = mlnx_hcas_init(); + if( status != STATUS_SUCCESS ) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP , + ("mlnx_hcas_init returned %#x.\n", status)); + goto end; + } + + // + // Initiialize driver config to control the attributes that + // are global to the driver. Note that framework by default + // provides a driver unload routine. If you create any resources + // in the DriverEntry and want to be cleaned in driver unload, + // you can override that by specifing one in the Config structure. + // + + WDF_DRIVER_CONFIG_INIT( + &config, EvtDeviceAdd ); + config.EvtDriverUnload = EvtDriverUnload; + + // + // Create a framework driver object to represent our driver. + // + status = WdfDriverCreate(DriverObject, + RegistryPath, WDF_NO_OBJECT_ATTRIBUTES, + &config, &hDriver); + + if (!NT_SUCCESS(status)) { + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_PNP, ("WdfDriverCreate failed with status 0x%x\n", status)); + goto end; + } + + // + // read registry parameters + // + { + DECLARE_CONST_UNICODE_STRING(valueName0, L"DebugLevel"); + DECLARE_CONST_UNICODE_STRING(valueName1, L"DebugFlags"); + ULONG value; + WDFKEY hKey = NULL; + + status = WdfDriverOpenParametersRegistryKey( hDriver, + STANDARD_RIGHTS_ALL, WDF_NO_OBJECT_ATTRIBUTES, &hKey ); + + if (NT_SUCCESS (status)) { + + status = WdfRegistryQueryULong(hKey, &valueName0, &value); + if (NT_SUCCESS (status)) g.DebugPrintLevel = value; + + status = WdfRegistryQueryULong(hKey, &valueName1, &value); + if (NT_SUCCESS (status)) g.DebugPrintFlags = value; + + WdfRegistryClose(hKey); + } + + // we don't matter the failure in the work with Registry + status = STATUS_SUCCESS; + } + +end: + HCA_EXIT( HCA_DBG_PNP ); + return status; + +} + +#else + + +UNICODE_STRING g_param_path; +static cl_vfptr_pnp_po_t vfptrHcaPnp; + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT p_driver_obj, + IN PUNICODE_STRING p_registry_path ); + +static NTSTATUS +__read_registry( + IN UNICODE_STRING* const p_Param_Path ); + +static void +hca_drv_unload( + IN PDRIVER_OBJECT p_driver_obj ); + +static NTSTATUS +hca_sysctl( + IN PDEVICE_OBJECT p_dev_obj, + IN PIRP p_irp ); + +NTSTATUS +hca_add_device( + IN PDRIVER_OBJECT pDriverObj, + IN PDEVICE_OBJECT pPdo ); + +static NTSTATUS +hca_start( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_cancel_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static void +hca_release_resources( + IN DEVICE_OBJECT* const p_dev_obj ); + +static NTSTATUS +hca_cancel_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_surprise_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_capabilities( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_pnp_state( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_bus_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_removal_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_set_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static ci_interface_t* +__alloc_hca_ifc( + IN FDO_DEVICE_DATA* const p_fdo ); + +static NTSTATUS +__get_ci_interface( + IN DEVICE_OBJECT* const p_dev_obj ); + +static void +__hca_deregister( + IN FDO_DEVICE_DATA *p_fdo ); + +static NTSTATUS +__hca_register( + IN DEVICE_OBJECT *p_dev_obj ); + +static NTSTATUS +__pnp_notify_target( + IN void *pNotifyStruct, + IN void *context ); + +static NTSTATUS +__pnp_notify_ifc( + IN void *pNotifyStruct, + IN void *context ); + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (INIT, DriverEntry) +#pragma alloc_text (INIT, __read_registry) +#pragma alloc_text (PAGE, hca_drv_unload) +#pragma alloc_text (PAGE, hca_sysctl) +#pragma alloc_text (PAGE, hca_add_device) +#pragma alloc_text (PAGE, hca_start) +#pragma alloc_text (PAGE, hca_query_stop) +#pragma alloc_text (PAGE, hca_stop) +#pragma alloc_text (PAGE, hca_cancel_stop) +#pragma alloc_text (PAGE, hca_query_remove) +#pragma alloc_text (PAGE, hca_release_resources) +#pragma alloc_text (PAGE, hca_cancel_remove) +#pragma alloc_text (PAGE, hca_surprise_remove) +#pragma alloc_text (PAGE, hca_query_capabilities) +#pragma alloc_text (PAGE, hca_query_pnp_state) +#pragma alloc_text (PAGE, hca_query_bus_relations) +#pragma alloc_text (PAGE, hca_query_removal_relations) +#pragma alloc_text (PAGE, hca_set_power) +#pragma alloc_text (PAGE, __alloc_hca_ifc) +#pragma alloc_text (PAGE, __get_ci_interface) +#pragma alloc_text (PAGE, __hca_register) +#pragma alloc_text (PAGE, __pnp_notify_target) +#pragma alloc_text (PAGE, __pnp_notify_ifc) +#endif + + +NTSTATUS +hca_add_device( + IN PDRIVER_OBJECT pDriverObj, + IN PDEVICE_OBJECT pPdo ) +{ + NTSTATUS status; + DEVICE_OBJECT *p_dev_obj, *pNextDevObj; + PFDO_DEVICE_DATA p_fdo; + + HCA_ENTER(HCA_DBG_PNP); + + /* + * Create the device so that we have a device extension to store stuff in. + */ + status = IoCreateDevice( pDriverObj, sizeof(FDO_DEVICE_DATA), + NULL, FILE_DEVICE_INFINIBAND, FILE_DEVICE_SECURE_OPEN, + FALSE, &p_dev_obj ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoCreateDevice returned 0x%08X.\n", status)); + return status; + } + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + cl_memclr( p_fdo, sizeof(FDO_DEVICE_DATA) ); + cl_spinlock_init( &p_fdo->uctx_lock ); + cl_qlist_init( &p_fdo->uctx_list ); + atomic_set(&p_fdo->usecnt, 0); + + /* Attach to the device stack. */ + pNextDevObj = IoAttachDeviceToDeviceStack( p_dev_obj, pPdo ); + if( !pNextDevObj ) + { + //cl_event_destroy( &p_fdo->mutex ); + IoDeleteDevice( p_dev_obj ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoAttachDeviceToDeviceStack failed.\n")); + return STATUS_NO_SUCH_DEVICE; + } + + /* Inititalize the complib extension. */ + cl_init_pnp_po_ext( p_dev_obj, pNextDevObj, pPdo, 0, + &vfptrHcaPnp, NULL ); + + p_fdo->state = HCA_ADDED; + + HCA_EXIT(HCA_DBG_PNP); + return status; +} + +/* Forwards the request to the HCA's PDO. */ +static NTSTATUS +__get_ifc( + IN DEVICE_OBJECT* const pDevObj, + IN const GUID* const pGuid, + IN USHORT size, + IN USHORT Version, + IN OUT PVOID InterfaceSpecificData, + OUT PINTERFACE pBusIfc ) +{ + NTSTATUS status; + IRP *pIrp; + IO_STATUS_BLOCK ioStatus; + IO_STACK_LOCATION *pIoStack; + DEVICE_OBJECT *pDev; + KEVENT event; + + HCA_ENTER( HCA_DBG_PNP ); + + CL_ASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + pDev = IoGetAttachedDeviceReference( pDevObj ); + + KeInitializeEvent( &event, NotificationEvent, FALSE ); + + /* Build the IRP for the HCA. */ + pIrp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, pDev, + NULL, 0, NULL, &event, &ioStatus ); + if( !pIrp ) + { + ObDereferenceObject( pDev ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Copy the request query parameters. */ + pIoStack = IoGetNextIrpStackLocation( pIrp ); + pIoStack->MinorFunction = IRP_MN_QUERY_INTERFACE; + pIoStack->Parameters.QueryInterface.Size = size; + pIoStack->Parameters.QueryInterface.Version = Version; + pIoStack->Parameters.QueryInterface.InterfaceType = pGuid; + pIoStack->Parameters.QueryInterface.Interface = (INTERFACE*)pBusIfc; + pIoStack->Parameters.QueryInterface.InterfaceSpecificData = InterfaceSpecificData; + + pIrp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + /* Send the IRP. */ + status = IoCallDriver( pDev, pIrp ); + if( status == STATUS_PENDING ) + { + KeWaitForSingleObject( &event, Executive, KernelMode, + FALSE, NULL ); + + status = ioStatus.Status; + } + ObDereferenceObject( pDev ); + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +static NTSTATUS +__get_ci_interface( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + NTSTATUS status; + IRP *p_irp; + PFDO_DEVICE_DATA p_fdo; + IO_STATUS_BLOCK ioStatus; + IO_STACK_LOCATION *pIoStack; + KEVENT event; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + KeInitializeEvent( &event, NotificationEvent, FALSE ); + + /* Query for the verbs interface. */ + p_irp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, p_fdo->p_al_dev, + NULL, 0, NULL, &event, &ioStatus ); + if( !p_irp ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Format the IRP. */ + pIoStack = IoGetNextIrpStackLocation( p_irp ); + pIoStack->MinorFunction = IRP_MN_QUERY_INTERFACE; + pIoStack->Parameters.QueryInterface.Version = IB_CI_INTERFACE_VERSION; + pIoStack->Parameters.QueryInterface.Size = sizeof(ib_ci_ifc_t); + pIoStack->Parameters.QueryInterface.Interface = + (INTERFACE*)&p_fdo->ci_ifc; + pIoStack->Parameters.QueryInterface.InterfaceSpecificData = NULL; + pIoStack->Parameters.QueryInterface.InterfaceType = + &GUID_IB_CI_INTERFACE; + p_irp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + /* Send the IRP. */ + status = IoCallDriver( p_fdo->p_al_dev, p_irp ); + if( status == STATUS_PENDING ) + { + KeWaitForSingleObject( &event, Executive, KernelMode, + FALSE, NULL ); + + status = ioStatus.Status; + } + + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Query interface for verbs returned %08x.\n", status)); + return status; + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +__pnp_notify_target( + IN void *pNotifyStruct, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + DEVICE_OBJECT *p_dev_obj; + PFDO_DEVICE_DATA p_fdo; + TARGET_DEVICE_REMOVAL_NOTIFICATION *pNotify; + + HCA_ENTER( HCA_DBG_PNP ); + + pNotify = (TARGET_DEVICE_REMOVAL_NOTIFICATION*)pNotifyStruct; + p_dev_obj = (DEVICE_OBJECT*)context; + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + if( IsEqualGUID( &pNotify->Event, &GUID_TARGET_DEVICE_QUERY_REMOVE ) ) + { + if ( p_fdo->state == HCA_REGISTERED) { + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + p_fdo->state = HCA_IFC_DEREFERENCED; + } + + /* Release AL's file object so that it can unload. */ + CL_ASSERT( p_fdo->p_al_dev ); + CL_ASSERT( p_fdo->p_al_file_obj ); + CL_ASSERT( p_fdo->p_al_file_obj == pNotify->FileObject ); + if( p_fdo->p_al_file_obj ) { + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + p_fdo->p_al_dev = NULL; + } + } + else if( IsEqualGUID( &pNotify->Event, + &GUID_TARGET_DEVICE_REMOVE_COMPLETE ) ) + { + if (p_fdo->ci_ifc.deregister_ca) { + /* Notify AL that the CA is being removed. */ + p_fdo->ci_ifc.deregister_ca( p_fdo->hca.guid ); + p_fdo->ci_ifc.deregister_ca = NULL; + } + + if ( p_fdo->state == HCA_REGISTERED) { + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + } + p_fdo->state = HCA_STARTED; + + /* Release AL's file object so that it can unload. */ + if( p_fdo->p_al_file_obj ) + { + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + p_fdo->p_al_dev = NULL; + } + + /* Cancel our target device change registration. */ + if (p_fdo->pnp_target_entry) { + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; + } + + } + else if( IsEqualGUID( &pNotify->Event, + &GUID_TARGET_DEVICE_REMOVE_CANCELLED ) ) + { + /* Cancel our target device change registration. */ + if (p_fdo->pnp_target_entry) { + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; + } + + /* Get the device object pointer for the AL. */ + CL_ASSERT( !p_fdo->p_al_file_obj ); + CL_ASSERT( !p_fdo->p_al_dev ); + /* Get the AL device object. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("Calling IoGetDeviceObjectPointer.\n")); + status = IoGetDeviceObjectPointer( &p_fdo->al_sym_name, + FILE_ALL_ACCESS, &p_fdo->p_al_file_obj, &p_fdo->p_al_dev ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); + return STATUS_SUCCESS; + } + + /* Register for removal notification of the IB Fabric root device. */ + status = IoRegisterPlugPlayNotification( + EventCategoryTargetDeviceChange, 0, p_fdo->p_al_file_obj, + p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, + &p_fdo->pnp_target_entry ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + return status; + } + + CL_ASSERT( p_fdo->state == HCA_IFC_DEREFERENCED ); + if ( p_fdo->state == HCA_IFC_DEREFERENCED) { + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceReference( p_fdo->ci_ifc.wdm.Context ); + p_fdo->state = HCA_REGISTERED; + } + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +static ci_interface_t* +__alloc_hca_ifc( + IN PFDO_DEVICE_DATA const p_fdo ) +{ + ci_interface_t *pIfc; + + HCA_ENTER( HCA_DBG_PNP ); + + pIfc = (ci_interface_t*)ExAllocatePoolWithTag( PagedPool, + sizeof(ci_interface_t), MT_TAG_KERNEL ); + if( !pIfc ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Failed to allocate ci_interface_t (%d bytes).\n", + sizeof(ci_interface_t))); + return NULL; + } + + setup_ci_interface( p_fdo->hca.guid, !!hca_is_livefish(p_fdo), pIfc ); + + pIfc->p_hca_dev = p_fdo->cl_ext.p_pdo; + pIfc->vend_id = (uint32_t)p_fdo->bus_ib_ifc.pdev->ven_id; + pIfc->dev_id = (uint16_t)p_fdo->bus_ib_ifc.pdev->dev_id; + pIfc->dev_revision = (uint16_t)p_fdo->hca.hw_ver; + + HCA_EXIT( HCA_DBG_PNP ); + return pIfc; +} + +static void +__hca_deregister( + IN PFDO_DEVICE_DATA p_fdo ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + if ( p_fdo->state == HCA_REGISTERED) { + if (p_fdo->ci_ifc.deregister_ca) { + /* Notify AL that the CA is being removed. */ + p_fdo->ci_ifc.deregister_ca( p_fdo->hca.guid ); + p_fdo->ci_ifc.deregister_ca = NULL; + /* Release AL's CI interface. */ + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + p_fdo->state = HCA_STARTED; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP, + ("***** HCA deregistered \n")); + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + +static NTSTATUS +__hca_register( + IN DEVICE_OBJECT *p_dev_obj ) +{ + PFDO_DEVICE_DATA p_fdo; + NTSTATUS status; + ib_api_status_t ib_status; + ci_interface_t *p_hca_ifc; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + ASSERT( p_fdo->state == HCA_STARTED ); + ASSERT( p_fdo->p_al_dev ); + + /* Get the AL's lower interface. */ + status = __get_ci_interface( p_dev_obj ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); + goto exit; + } + + /* Allocate and populate our HCA interface structure. */ + p_hca_ifc = __alloc_hca_ifc( p_fdo ); + if( !p_hca_ifc ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("__alloc_hca_ifc failed.\n")); + status = STATUS_NO_MEMORY; + goto exit; + } + + /* Notify AL that we're available... */ + ib_status = p_fdo->ci_ifc.register_ca( p_hca_ifc ); + ExFreePool( p_hca_ifc ); + if( ib_status != IB_SUCCESS ) + { + p_fdo->ci_ifc.wdm.InterfaceDereference( p_fdo->ci_ifc.wdm.Context ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + p_fdo->state = HCA_REGISTERED; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP, + ("***** HCA registered \n")); +exit: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +__pnp_notify_ifc( + IN void *pNotifyStruct, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + DEVICE_OBJECT *p_dev_obj; + PFDO_DEVICE_DATA p_fdo; + DEVICE_INTERFACE_CHANGE_NOTIFICATION *pNotify; + + HCA_ENTER( HCA_DBG_PNP ); + + pNotify = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)pNotifyStruct; + p_dev_obj = (DEVICE_OBJECT*)context; + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + if( !IsEqualGUID( &pNotify->Event, &GUID_DEVICE_INTERFACE_ARRIVAL ) ) + goto done; + + /* + * Sanity check. We should only be getting notifications of the + * CI interface exported by AL. + */ + ASSERT( + IsEqualGUID( &pNotify->InterfaceClassGuid, &GUID_IB_CI_INTERFACE ) ); + + if( p_fdo->state != HCA_STARTED ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Invalid state: %d\n", p_fdo->state)); + goto done; + } + + /* save symbolic name of IBAL for a case of cancelled IBAL removal */ + if (!p_fdo->al_sym_name.Buffer) { + p_fdo->al_sym_name.Length = pNotify->SymbolicLinkName->Length; + p_fdo->al_sym_name.MaximumLength = pNotify->SymbolicLinkName->MaximumLength; + p_fdo->al_sym_name.Buffer = ExAllocatePoolWithTag( NonPagedPool, + p_fdo->al_sym_name.MaximumLength * sizeof(wchar_t), + 'cfin' ); + if (!p_fdo->al_sym_name.Buffer) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("allocation of sym IBAL name failed.\n")); + goto done; + } + RtlCopyUnicodeString( &p_fdo->al_sym_name, pNotify->SymbolicLinkName ); + } + + ASSERT( !p_fdo->p_al_dev ); + ASSERT( !p_fdo->p_al_file_obj ); + + /* Get the AL device object. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Calling IoGetDeviceObjectPointer.\n")); + status = IoGetDeviceObjectPointer( pNotify->SymbolicLinkName, + FILE_ALL_ACCESS, &p_fdo->p_al_file_obj, &p_fdo->p_al_dev ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); + goto done; + } + + /* Register for removal notification of the IB Fabric root device. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Registering for target notifications.\n")); + status = IoRegisterPlugPlayNotification( + EventCategoryTargetDeviceChange, 0, p_fdo->p_al_file_obj, + p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, + &p_fdo->pnp_target_entry ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + goto err_reg_notify; + } + + status = __hca_register( p_dev_obj ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); + goto err_reg_hca; + } + goto done; + +err_reg_hca: + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; +err_reg_notify: + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + p_fdo->p_al_dev = NULL; +done: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static void +__unmap_hca_memory( + IN PFDO_DEVICE_DATA const p_fdo ) +{ + struct pci_dev *pdev = p_fdo->bus_ib_ifc.pdev; + int i; + + HCA_ENTER( HCA_DBG_PNP ); + + for( i = 0; i < HCA_BAR_TYPE_MAX; i++ ) { + if (pdev->bar[i].virt) { + MmUnmapIoSpace( pdev->bar[i].virt, pdev->bar[i].size ); + cl_memclr( &pdev->bar[i], sizeof(hca_bar_t) ); + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + + +/* release the resources, allocated in hca_start */ +static void +__hca_release_resources( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + PFDO_DEVICE_DATA p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + HCA_ENTER( HCA_DBG_PNP ); + + switch( p_fdo->state ) + { + case HCA_REGISTERED: + __hca_deregister( p_fdo ); + + /* Fall through. */ + case HCA_STARTED: + /* dequeue HCA */ + mlnx_hca_remove( &p_fdo->hca ); + } + + if (p_fdo->al_sym_name.Buffer) { + ExFreePool( p_fdo->al_sym_name.Buffer ); + p_fdo->al_sym_name.Buffer = NULL; + } + + if( p_fdo->pnp_target_entry ) + { + ASSERT( p_fdo->pnp_ifc_entry ); + IoUnregisterPlugPlayNotification( p_fdo->pnp_target_entry ); + p_fdo->pnp_target_entry = NULL; + } + + if( p_fdo->pnp_ifc_entry ) { + IoUnregisterPlugPlayNotification( p_fdo->pnp_ifc_entry ); + p_fdo->pnp_ifc_entry = NULL; + } + + if( p_fdo->p_al_file_obj ) { + ObDereferenceObject( p_fdo->p_al_file_obj ); + p_fdo->p_al_file_obj = NULL; + } + + // release MLX4_BUS resources + if(p_fdo->bus_ib_ifc_taken) { + p_fdo->bus_ib_ifc_taken = FALSE; + __put_ifc( (PINTERFACE)&p_fdo->bus_ib_ifc ); + } + + // release PCI BUS resources + if(p_fdo->bus_pci_ifc_taken) { + p_fdo->bus_pci_ifc_taken = FALSE; + __put_ifc( (PINTERFACE)&p_fdo->bus_pci_ifc ); + } + + __unmap_hca_memory( p_fdo ); + + p_fdo->state = HCA_ADDED; + + HCA_EXIT( HCA_DBG_PNP ); +} + + +static void +hca_release_resources( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + PFDO_DEVICE_DATA p_fdo; + POWER_STATE powerState; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + /* release all the resources, allocated in hca_start */ + __hca_release_resources(p_dev_obj); + + /* Notify the power manager that the device is powered down. */ + p_fdo->DevicePowerState = PowerDeviceD3; + powerState.DeviceState = PowerDeviceD3; + powerState = PoSetPowerState ( p_fdo->cl_ext.p_self_do, DevicePowerState, powerState ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("PoSetPowerState: old state %d, new state to %d\n", + powerState.DeviceState, p_fdo->DevicePowerState )); + + /* Clear the PnP state in case we get restarted. */ + p_fdo->pnpState = 0; + + HCA_EXIT( HCA_DBG_PNP ); +} + +static NTSTATUS +hca_start( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + int err; + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo; + IO_STACK_LOCATION *pIoStack; + POWER_STATE powerState; + BUS_INTERFACE_STANDARD bus_pci_ifc; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + /* Handled on the way up. */ + status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Lower drivers failed IRP_MN_START_DEVICE (%#x).\n", status)); + goto end; + } + + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + /* get PCI BUS interface */ + status = __get_ifc( p_dev_obj, &GUID_BUS_INTERFACE_STANDARD, + sizeof(BUS_INTERFACE_STANDARD), 1, NULL, (PINTERFACE)&bus_pci_ifc); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("Getting PCI BUS interface failed: status=0x%x\n", status)); + goto end; + } + RtlCopyMemory( &p_fdo->bus_pci_ifc, &bus_pci_ifc, sizeof(BUS_INTERFACE_STANDARD) ); + p_fdo->bus_pci_ifc_taken = TRUE; + + /* get MLX4_BUS IB interface */ + status = __get_ifc( p_dev_obj, &MLX4_BUS_IB_INTERFACE_GUID, + sizeof(MLX4_BUS_IB_INTERFACE), 1, NULL, (PINTERFACE)&p_fdo->bus_ib_ifc); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("Getting MLX4 BUS interface failed: status=0x%x\n", status)); + goto end; + } + p_fdo->bus_ib_ifc_taken = TRUE; + p_fdo->bus_ib_ifc.p_ibdev->x.p_fdo = p_fdo; + + + /* get node GUID */ + err = __get_dev_info( p_fdo, &p_fdo->hca.guid, &p_fdo->hca.hw_ver ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , + ("can't get guid - ib_query_device() failed (%08X)\n", err )); + //TODO: no cleanup on error + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* queue HCA */ + mlnx_hca_insert( &p_fdo->hca ); + + /* + * Change the state since the PnP callback can happen + * before the callback returns. + */ + p_fdo->state = HCA_STARTED; + + /* Register for interface arrival of the IB_AL device. */ + status = IoRegisterPlugPlayNotification( + EventCategoryDeviceInterfaceChange, + PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, + (void*)&GUID_IB_CI_INTERFACE, p_dev_obj->DriverObject, + __pnp_notify_ifc, p_dev_obj, &p_fdo->pnp_ifc_entry ); + if( !NT_SUCCESS( status ) ) + { + p_fdo->state = HCA_ADDED; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + } + + /* We get started fully powered. */ + p_fdo->DevicePowerState = PowerDeviceD0; + powerState.DeviceState = PowerDeviceD0; + powerState = PoSetPowerState ( p_fdo->cl_ext.p_self_do, DevicePowerState, powerState ); + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("PoSetPowerState: old state %d, new state to %d\n", + powerState.DeviceState, p_fdo->DevicePowerState )); + +end: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +hca_query_removal_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + if( p_fdo->state == HCA_REGISTERED ) + { + status = p_fdo->ci_ifc.get_relations( p_fdo->hca.guid, p_irp ); + if( !NT_SUCCESS( status ) ) + { + *p_action = IrpComplete; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); + return status; + } + } + + *p_action = IrpPassDown; + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + + +static NTSTATUS +hca_query_bus_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + DEVICE_RELATIONS *p_rel; + PFDO_DEVICE_DATA p_fdo; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = p_dev_obj->DeviceExtension; + + //cl_event_wait_on( &p_fdo->mutex, EVENT_NO_TIMEOUT, FALSE ); + if( p_fdo->state == HCA_REGISTERED ) + { + status = p_fdo->ci_ifc.get_relations( p_fdo->hca.guid, p_irp ); + if( !NT_SUCCESS( status ) ) + { + //cl_event_signal( &p_fdo->mutex ); + *p_action = IrpComplete; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); + return status; + } + } + else + { + status = cl_alloc_relations( p_irp, 1 ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_alloc_relations returned %08x.\n", status)); + return status; + } + + p_rel = (DEVICE_RELATIONS*)p_irp->IoStatus.Information; + p_rel->Count = 0; + p_rel->Objects[0] = NULL; + } + + //cl_event_signal( &p_fdo->mutex ); + + *p_action = IrpPassDown; + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + + +static NTSTATUS +hca_query_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* All kernel clients will get notified through the device hierarchy. */ + + /* TODO: set a flag to fail creation of any new IB resources. */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* + * Must disable everything. Complib framework will + * call ReleaseResources handler. + */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_cancel_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* Handled on the way up. */ + return cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_query_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + PFDO_DEVICE_DATA p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + if (atomic_read(&p_fdo->usecnt)) { + DbgPrint( "MTHCA: Can't get unloaded. %d applications are still in work\n", p_fdo->usecnt); + p_irp->IoStatus.Status = STATUS_UNSUCCESSFUL; + return cl_irp_complete( p_dev_obj, p_irp, p_action ); + } + /* TODO: set a flag to fail creation of any new IB resources. */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_cancel_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* Handled on the way up. */ + return cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_surprise_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* + * TODO: Set state so that all further requests + * automatically succeed/fail as needed. + */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_query_capabilities( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo; + IO_STACK_LOCATION *pIoStack; + DEVICE_CAPABILITIES *pCaps; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + /* Process on the way up. */ + status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_do_sync_pnp returned %08X.\n", status)); + return status; + } + + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + pCaps = pIoStack->Parameters.DeviceCapabilities.Capabilities; + + /* + * Store the device power mapping into our extension since we're + * the power policy owner. The mapping is used when handling + * IRP_MN_SET_POWER IRPs. + */ + cl_memcpy( + p_fdo->DevicePower, pCaps->DeviceState, sizeof(p_fdo->DevicePower) ); + + if( pCaps->DeviceD1 ) + { + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("WARNING: Device reports support for DeviceD1 power state.\n")); + pCaps->DeviceD1 = FALSE; + } + + if( pCaps->DeviceD2 ) + { + HCA_PRINT( TRACE_LEVEL_WARNING,HCA_DBG_PNP, + ("WARNING: Device reports support for DeviceD2 power state.\n")); + pCaps->DeviceD2 = FALSE; + } + + if( pCaps->SystemWake != PowerSystemUnspecified ) + { + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("WARNING: Device reports support for system wake.\n")); + pCaps->SystemWake = PowerSystemUnspecified; + } + + if( pCaps->DeviceWake != PowerDeviceUnspecified ) + { + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("WARNING: Device reports support for device wake.\n")); + pCaps->DeviceWake = PowerDeviceUnspecified; + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +hca_query_pnp_state( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + PFDO_DEVICE_DATA p_fdo; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + + p_irp->IoStatus.Information |= p_fdo->pnpState; + + *p_action = IrpSkip; + + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS;; +} + +static NTSTATUS +hca_query_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status = STATUS_SUCCESS; + IO_STACK_LOCATION *pIoStack; + + HCA_ENTER(HCA_DBG_PO); + + UNUSED_PARAM( p_dev_obj ); + + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("QUERY_POWER for FDO %p: type %s, state %d, action %d, IRQL %d, IRP %p\n", + p_dev_obj, + (pIoStack->Parameters.Power.Type) ? "DevicePowerState" : "SystemPowerState", + pIoStack->Parameters.Power.State.DeviceState, + pIoStack->Parameters.Power.ShutdownType, KeGetCurrentIrql(), p_irp )); + + switch( pIoStack->Parameters.Power.Type ) + { + case SystemPowerState: + /* Fail any requests to hibernate or sleep the system. */ + switch( pIoStack->Parameters.Power.State.SystemState ) + { + case PowerSystemSleeping1: // STANDBY support + case PowerSystemHibernate: + { + PFDO_DEVICE_DATA p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + if (atomic_read(&p_fdo->usecnt)) + status = STATUS_UNSUCCESSFUL; + break; + } + + case PowerSystemWorking: + case PowerSystemShutdown: + break; + + default: + status = STATUS_NOT_SUPPORTED; + } + break; + + case DevicePowerState: + /* Fail any query for low power states. */ + switch( pIoStack->Parameters.Power.State.DeviceState ) + { + case PowerDeviceD0: + case PowerDeviceD3: + /* We only support fully powered or off power states. */ + break; + + default: + status = STATUS_NOT_SUPPORTED; + } + break; + } + + if( status == STATUS_SUCCESS ) + *p_action = IrpSkip; + else + *p_action = IrpComplete; + + HCA_EXIT( HCA_DBG_PO ); + return status; +} + + +static void +__RequestPowerCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN UCHAR minorFunction, + IN POWER_STATE powerState, + IN void *context, + IN IO_STATUS_BLOCK *pIoStatus ) +{ + IRP *p_irp; + cl_pnp_po_ext_t *p_fdo; + + HCA_ENTER( HCA_DBG_PO ); + + UNUSED_PARAM( minorFunction ); + UNUSED_PARAM( powerState ); + + p_irp = (IRP*)context; + p_fdo = (cl_pnp_po_ext_t*)p_dev_obj->DeviceExtension; + + /* Propagate the device IRP status to the system IRP status. */ + p_irp->IoStatus.Status = pIoStatus->Status; + + /* Continue Power IRP processing. */ + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_fdo->remove_lock, p_irp ); + HCA_EXIT( HCA_DBG_PO ); +} + + +/*NOTE: Completion routines must NEVER be pageable. */ +static NTSTATUS +__SystemPowerCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, + IN void *context ) +{ + NTSTATUS status; + POWER_STATE state; + PFDO_DEVICE_DATA p_fdo; + IO_STACK_LOCATION *pIoStack; + + HCA_ENTER( HCA_DBG_PO ); + + UNUSED_PARAM( context ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("IRP_MN_SET_POWER for system failed by lower driver with %08x.\n", + p_irp->IoStatus.Status)); + status = STATUS_SUCCESS; + PoStartNextPowerIrp( p_irp ); + goto release; + } + + state.DeviceState = + p_fdo->DevicePower[pIoStack->Parameters.Power.State.SystemState]; + + /* + * Send a device power IRP to our devnode. Using our device object will + * only work on win2k and other NT based systems. + */ + status = PoRequestPowerIrp( p_dev_obj, IRP_MN_SET_POWER, state, + __RequestPowerCompletion, p_irp, NULL ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("PoRequestPowerIrp: SET_POWER 'PowerDeviceD%d', status %#x\n", + state.DeviceState - 1, status )); + + if( status != STATUS_PENDING ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("PoRequestPowerIrp returned %08x.\n", status)); + p_irp->IoStatus.Status = status; /* Propagate the failure. */ + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + goto release; + } + + status = STATUS_MORE_PROCESSING_REQUIRED; + goto exit; + +release: + IoReleaseRemoveLock( &p_fdo->cl_ext.remove_lock, p_irp ); +exit: + HCA_EXIT( HCA_DBG_PO ); + return status; +} + + +/* Work item callback to handle DevicePowerD0 IRPs at passive level. */ +static void +__DevicePowerUpCompletionWorkItem( + IN DEVICE_OBJECT* p_dev_obj, + IN void* context ) +{ + NTSTATUS status; + IO_STACK_LOCATION *pIoStack; + PFDO_DEVICE_DATA p_fdo; + IRP *p_irp; + POWER_STATE powerState; + + HCA_ENTER( HCA_DBG_PO ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + p_irp = (IRP*)context; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + IoFreeWorkItem( p_fdo->pPoWorkItem ); + p_fdo->pPoWorkItem = NULL; + + /* restart the HCA */ + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("***** Restart the HCA, IRQL %d\n", KeGetCurrentIrql())); + + if( p_fdo->p_al_dev ) { + status = __hca_register( p_dev_obj ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("!!! __hca_register failed (%#x) \n", status)); + goto err_hca_reg; + } + } + + p_fdo->DevicePowerState = pIoStack->Parameters.Power.State.DeviceState; + powerState = PoSetPowerState( p_dev_obj, DevicePowerState, + pIoStack->Parameters.Power.State ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("PoSetPowerState: old state %d, new state to %d\n", + powerState.DeviceState, p_fdo->DevicePowerState )); + + goto exit; + +err_hca_reg: + /* Flag device as having failed. */ + p_fdo->pnpState |= PNP_DEVICE_FAILED; + IoInvalidateDeviceState( p_fdo->cl_ext.p_pdo ); +exit: + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_fdo->cl_ext.remove_lock, p_irp ); + HCA_EXIT( HCA_DBG_PO ); +} + +/*NOTE: Completion routines must NEVER be pageable. */ +static NTSTATUS +__DevicePowerUpCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + PFDO_DEVICE_DATA p_fdo; + IO_STACK_LOCATION *pIoStack; + + HCA_ENTER( HCA_DBG_PO ); + + UNUSED_PARAM( context ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("IRP_MN_SET_POWER for device failed by lower driver with %08x.\n", + p_irp->IoStatus.Status)); + status = STATUS_SUCCESS; + PoStartNextPowerIrp( p_irp ); + goto release; + } + + /* Process in a work item - mthca_start blocks. */ + ASSERT( !p_fdo->pPoWorkItem ); + p_fdo->pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); + if( !p_fdo->pPoWorkItem ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("Failed to allocate work item.\n" )); + status = STATUS_SUCCESS; + p_fdo->pnpState |= PNP_DEVICE_FAILED; + IoInvalidateDeviceState( p_fdo->cl_ext.p_pdo ); + PoStartNextPowerIrp( p_irp ); + goto release; + } + + /* Process in work item callback. */ + IoMarkIrpPending( p_irp ); + IoQueueWorkItem( p_fdo->pPoWorkItem, + __DevicePowerUpCompletionWorkItem, DelayedWorkQueue, p_irp ); + status = STATUS_MORE_PROCESSING_REQUIRED; + goto exit; + +release: + IoReleaseRemoveLock( &p_fdo->cl_ext.remove_lock, p_irp ); +exit: + HCA_EXIT( HCA_DBG_PO ); + return status; +} + +static NTSTATUS __DevicePowerDownWorkItemCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, + IN void *context ) +{ + PFDO_DEVICE_DATA p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + UNUSED_PARAM( context ); + + HCA_ENTER( HCA_DBG_PO ); + + PoStartNextPowerIrp( p_irp ); + IoReleaseRemoveLock( &p_fdo->cl_ext.remove_lock, p_irp ); + + HCA_EXIT( HCA_DBG_PO ); + return STATUS_SUCCESS; +} + +/* Work item callback to handle DevicePowerD3 IRPs at passive level. */ +static void +__DevicePowerDownWorkItem( + IN DEVICE_OBJECT* p_dev_obj, + IN void* context ) +{ + IO_STACK_LOCATION *pIoStack; + PFDO_DEVICE_DATA p_fdo; + IRP *p_irp; + POWER_STATE powerState; + + HCA_ENTER( HCA_DBG_PO ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + p_irp = (IRP*)context; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + IoFreeWorkItem( p_fdo->pPoWorkItem ); + p_fdo->pPoWorkItem = NULL; + + p_fdo->DevicePowerState = pIoStack->Parameters.Power.State.DeviceState; + powerState = PoSetPowerState( p_dev_obj, DevicePowerState, + pIoStack->Parameters.Power.State ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("PoSetPowerState: old state %d, new state to %d, IRQL %d\n", + powerState.DeviceState, p_fdo->DevicePowerState, KeGetCurrentIrql() )); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("***** Remove the HCA \n")); + + { + __hca_deregister( p_fdo ); + } + + IoCopyCurrentIrpStackLocationToNext( p_irp ); +#pragma warning( push, 3 ) + IoSetCompletionRoutine( p_irp, __DevicePowerDownWorkItemCompletion, + NULL, TRUE, TRUE, TRUE ); +#pragma warning( pop ) + PoCallDriver( p_fdo->cl_ext.p_next_do, p_irp ); + + HCA_EXIT( HCA_DBG_PO ); +} + + +static NTSTATUS +hca_set_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + IO_STACK_LOCATION *pIoStack; + PFDO_DEVICE_DATA p_fdo; + + HCA_ENTER( HCA_DBG_PO ); + + p_fdo = (PFDO_DEVICE_DATA)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("SET_POWER for FDO %p (ext %p): type %s, state %d, action %d, IRQL %d \n", + p_dev_obj, p_fdo, + (pIoStack->Parameters.Power.Type) ? "DevicePowerState" : "SystemPowerState", + pIoStack->Parameters.Power.State.DeviceState, + pIoStack->Parameters.Power.ShutdownType, KeGetCurrentIrql() )); + + switch( pIoStack->Parameters.Power.Type ) + { + case SystemPowerState: + p_fdo->SystemPowerState = pIoStack->Parameters.Power.State.SystemState; + + /* + * Process on the way up the stack. We cannot block since the + * power dispatch function can be called at elevated IRQL if the + * device is in a paging/hibernation/crash dump path. + */ + IoMarkIrpPending( p_irp ); + IoCopyCurrentIrpStackLocationToNext( p_irp ); +#pragma warning( push, 3 ) + IoSetCompletionRoutine( p_irp, __SystemPowerCompletion, NULL, + TRUE, TRUE, TRUE ); +#pragma warning( pop ) + PoCallDriver( p_fdo->cl_ext.p_next_do, p_irp ); + + *p_action = IrpDoNothing; + status = STATUS_PENDING; + break; + + case DevicePowerState: + IoMarkIrpPending( p_irp ); + if( pIoStack->Parameters.Power.State.DeviceState == PowerDeviceD0 && + p_fdo->SystemPowerState == PowerSystemWorking) + { /* power up */ + /* If we're already powered up, just pass down. */ + if( p_fdo->DevicePowerState == PowerDeviceD0 ) + { + status = STATUS_SUCCESS; + *p_action = IrpIgnore; + break; + } + + /* Process in I/O completion callback. */ + IoCopyCurrentIrpStackLocationToNext( p_irp ); +#pragma warning( push, 3 ) + IoSetCompletionRoutine( p_irp, __DevicePowerUpCompletion, NULL, + TRUE, TRUE, TRUE ); +#pragma warning( pop ) + PoCallDriver( p_fdo->cl_ext.p_next_do, p_irp ); + } + else + { /* power down */ + + /* Process in a work item - deregister_ca and HcaDeinit block. */ + ASSERT( !p_fdo->pPoWorkItem ); + p_fdo->pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); + if( !p_fdo->pPoWorkItem ) + { + status = STATUS_INSUFFICIENT_RESOURCES; + break; + } + + /* Process in work item callback. */ + IoQueueWorkItem( + p_fdo->pPoWorkItem, __DevicePowerDownWorkItem, DelayedWorkQueue, p_irp ); + } + *p_action = IrpDoNothing; + status = STATUS_PENDING; + break; + + default: + /* Pass down and let the PDO driver handle it. */ + *p_action = IrpIgnore; + status = STATUS_SUCCESS; + break; + } + + if( !NT_SUCCESS( status ) ) + *p_action = IrpComplete; + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +void +hca_init_vfptr( void ) +{ + vfptrHcaPnp.identity = "MLX4_HCA driver"; + vfptrHcaPnp.pfn_start = hca_start; + vfptrHcaPnp.pfn_query_stop = hca_query_stop; + vfptrHcaPnp.pfn_stop = hca_stop; + vfptrHcaPnp.pfn_cancel_stop = hca_cancel_stop; + vfptrHcaPnp.pfn_query_remove = hca_query_remove; + vfptrHcaPnp.pfn_release_resources = hca_release_resources; + vfptrHcaPnp.pfn_remove = cl_do_remove; + vfptrHcaPnp.pfn_cancel_remove = hca_cancel_remove; + vfptrHcaPnp.pfn_surprise_remove = hca_surprise_remove; + vfptrHcaPnp.pfn_query_capabilities = hca_query_capabilities; + vfptrHcaPnp.pfn_query_pnp_state = hca_query_pnp_state; + vfptrHcaPnp.pfn_filter_res_req = cl_irp_skip; + vfptrHcaPnp.pfn_dev_usage_notification = cl_do_sync_pnp; + vfptrHcaPnp.pfn_query_bus_relations = hca_query_bus_relations; + vfptrHcaPnp.pfn_query_ejection_relations = cl_irp_ignore; + vfptrHcaPnp.pfn_query_removal_relations = hca_query_removal_relations; + vfptrHcaPnp.pfn_query_target_relations = cl_irp_ignore; + vfptrHcaPnp.pfn_unknown = cl_irp_ignore; + vfptrHcaPnp.pfn_query_resources = cl_irp_ignore; + vfptrHcaPnp.pfn_query_res_req = cl_irp_ignore; + vfptrHcaPnp.pfn_query_bus_info = cl_irp_ignore; + vfptrHcaPnp.pfn_query_interface = cl_irp_ignore; + vfptrHcaPnp.pfn_read_config = cl_irp_ignore; + vfptrHcaPnp.pfn_write_config = cl_irp_ignore; + vfptrHcaPnp.pfn_eject = cl_irp_ignore; + vfptrHcaPnp.pfn_set_lock = cl_irp_ignore; + vfptrHcaPnp.pfn_query_power = hca_query_power; + vfptrHcaPnp.pfn_set_power = hca_set_power; + vfptrHcaPnp.pfn_power_sequence = cl_irp_ignore; + vfptrHcaPnp.pfn_wait_wake = cl_irp_ignore; +} + +static NTSTATUS +__read_registry( + IN UNICODE_STRING* const p_registry_path ) +{ + NTSTATUS status; + /* Remember the terminating entry in the table below. */ + RTL_QUERY_REGISTRY_TABLE table[3]; + + HCA_ENTER( HCA_DBG_PNP ); + + RtlInitUnicodeString( &g_param_path, NULL ); + g_param_path.MaximumLength = p_registry_path->Length + + sizeof(L"\\Parameters"); + g_param_path.Buffer = cl_zalloc( g_param_path.MaximumLength ); + if( !g_param_path.Buffer ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to allocate parameters path buffer.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlAppendUnicodeStringToString( &g_param_path, p_registry_path ); + RtlAppendUnicodeToString( &g_param_path, L"\\Parameters" ); + + /* + * Clear the table. This clears all the query callback pointers, + * and sets up the terminating table entry. + */ + cl_memclr( table, sizeof(table) ); + + /* Setup the table entries. */ + table[0].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[0].Name = L"DebugLevel"; + table[0].EntryContext = &g.DebugPrintLevel; + table[0].DefaultType = REG_DWORD; + table[0].DefaultData = &g.DebugPrintLevel; + table[0].DefaultLength = sizeof(ULONG); + + + table[1].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[1].Name = L"DebugFlags"; + table[1].EntryContext = &g.DebugPrintFlags; + table[1].DefaultType = REG_DWORD; + table[1].DefaultData = &g.DebugPrintFlags; + table[1].DefaultLength = sizeof(ULONG); + + + /* Have at it! */ + status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, + g_param_path.Buffer, table, NULL, NULL ); + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +static void +hca_drv_unload( + IN PDRIVER_OBJECT p_driver_obj ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + UNUSED_PARAM( p_driver_obj ); + + cl_free( g_param_path.Buffer ); + + HCA_EXIT( HCA_DBG_PNP ); +#if defined(EVENT_TRACING) + WPP_CLEANUP(p_driver_obj); +#endif + +} + +static NTSTATUS +hca_sysctl( + IN PDEVICE_OBJECT p_dev_obj, + IN PIRP p_irp ) +{ + NTSTATUS status; + PFDO_DEVICE_DATA p_fdo; + + HCA_ENTER( HCA_DBG_PNP ); + + p_fdo = p_dev_obj->DeviceExtension; + + IoSkipCurrentIrpStackLocation( p_irp ); + status = IoCallDriver( p_fdo->cl_ext.p_next_do, p_irp ); + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT p_driver_obj, + IN PUNICODE_STRING p_registry_path ) +{ + NTSTATUS status; +#if defined(EVENT_TRACING) + WPP_INIT_TRACING(p_driver_obj ,p_registry_path); +#endif + // global initializations + g.DebugPrintLevel = TRACE_LEVEL_VERBOSE; + g.DebugPrintFlags = 0xffff; + + HCA_ENTER( HCA_DBG_PNP ); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Built %s %s, Version %s, RelDate %s\n", + __DATE__, __TIME__, DRV_VERSION, DRV_RELDATE)); + + status = mlnx_hcas_init(); + if( status != STATUS_SUCCESS ) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP , + ("mlnx_hcas_init returned %#x.\n", status)); + goto end; + } + + status = __read_registry( p_registry_path ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__read_registry_path returned 0x%X.\n", status)); + return status; + } + + /*leo: init function table */ + hca_init_vfptr(); + + p_driver_obj->MajorFunction[IRP_MJ_PNP] = cl_pnp; + p_driver_obj->MajorFunction[IRP_MJ_POWER] = cl_power; + p_driver_obj->MajorFunction[IRP_MJ_SYSTEM_CONTROL] = hca_sysctl; + p_driver_obj->DriverUnload = hca_drv_unload; + p_driver_obj->DriverExtension->AddDevice = hca_add_device; + +end: + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + +#endif diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/drv.h b/branches/ConnectX/hw/mlx4/kernel/hca/drv.h new file mode 100644 index 00000000..913e7fb0 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/drv.h @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + + +#pragma once + +//////////////////////////////////////////////////////// +// +// INCLUDES +// +//////////////////////////////////////////////////////// + +#ifdef USE_WDM_FRAMEWORK +#include +#endif +#include +#include "data.h" +#include "debug.h" +#include "bus_intf.h" + + +//////////////////////////////////////////////////////// +// +// MACROS +// +//////////////////////////////////////////////////////// + +#if !defined(FILE_DEVICE_INFINIBAND) // Not defined in WXP DDK +#define FILE_DEVICE_INFINIBAND 0x0000003B +#endif + +#define BUSENUM_POOL_TAG (ULONG) 'suBT' + +#define HCARESOURCENAME L"MofResourceName" + +#ifndef min +#define min(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif + +#ifndef max +#define max(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif + + +//////////////////////////////////////////////////////// +// +// TYPES +// +//////////////////////////////////////////////////////// + +/****s* HCA/hca_reg_state_t +* NAME +* hca_reg_state_t +* +* DESCRIPTION +* State for tracking registration with AL. This state is independent of the +* device PnP state, and both are used to properly register with AL. +* +* SYNOPSIS +*/ +typedef enum _hca_reg_state +{ + HCA_SHUTDOWN, + HCA_ADDED, + HCA_STARTED, + HCA_IFC_DEREFERENCED, + HCA_REGISTERED + +} hca_reg_state_t; +/* +* VALUES +* HCA_SHUTDOWN +* Cleaning up. +* +* HCA_ADDED +* AddDevice was called and successfully registered for interface +* notifications. +* +* HCA_STARTED +* IRP_MN_START_DEVICE was called. The HCA is fully functional. +* +* HCA_IFC_DEREFERENCED +* DEVICE_QUERY_REMOVE for IBBUS was received. +* +* HCA_REGISTERED +* Fully functional and registered with the bus root. +*********/ + +// +// Structure for reporting data to WMI +// + +typedef struct _HCA_WMI_STD_DATA { + UINT32 DebugPrintLevel; + UINT32 DebugPrintFlags; + +} HCA_WMI_STD_DATA, * PHCA_WMI_STD_DATA; + + +#pragma warning(disable:4201) // nameless struct/union +typedef struct _GLOBALS { + HCA_WMI_STD_DATA; +} GLOBALS; +#pragma warning(default:4201) // nameless struct/union + +extern GLOBALS g; + +// +// The device extension of the bus itself. From whence the PDO's are born. +// + +typedef struct _FDO_DEVICE_DATA +{ +#ifdef USE_WDM_FRAMEWORK + /* ------------------------------------------------- + * PNP and POWER MANAGER DATA + * ------------------------------------------------ */ + cl_pnp_po_ext_t cl_ext; /* COMPLIB PnP object */ + /* Cache of the system to device power states. */ + DEVICE_POWER_STATE DevicePower[PowerSystemMaximum]; + DEVICE_POWER_STATE DevicePowerState; + SYSTEM_POWER_STATE SystemPowerState; + PIO_WORKITEM pPoWorkItem; + +#else + + /* ------------------------------------------------- + * WDF DATA + * ------------------------------------------------ */ + WDFDEVICE FdoDevice; +#endif + + HCA_WMI_STD_DATA WmiData; + + /* ------------------------------------------------- + * PNP DATA + * ------------------------------------------------ */ + void * pnp_ifc_entry; /* Notification entry for PnP interface events. */ + void * pnp_target_entry; /* Notification entry for PnP target events. */ + PNP_DEVICE_STATE pnpState; /* state for PnP Manager */ + + /* ------------------------------------------------- + * IBAL DATA + * ------------------------------------------------ */ + ib_ci_ifc_t ci_ifc; /* Interface for the lower edge of the IB_AL device. */ + hca_reg_state_t state; /* State for tracking registration with AL */ + DEVICE_OBJECT * p_al_dev; /* IB_AL FDO */ + FILE_OBJECT * p_al_file_obj; /* IB_AL file object */ + UNICODE_STRING al_sym_name; /* IB_AL symbolic name */ + + /* ------------------------------------------------- + * SHIM DATA + * ------------------------------------------------ */ + mlnx_hca_t hca; + atomic32_t usecnt; /* the number of working applications*/ + spinlock_t uctx_lock; // spinlock for the below chain + cl_qlist_t uctx_list; // chain of user contexts + int bus_pci_ifc_taken; + BUS_INTERFACE_STANDARD bus_pci_ifc; /* PCI bus interface */ + + /* ------------------------------------------------- + * LOW LEVEL DRIVER' DATA + * ------------------------------------------------ */ + int bus_ib_ifc_taken; + MLX4_BUS_IB_INTERFACE bus_ib_ifc; + + +} FDO_DEVICE_DATA, *PFDO_DEVICE_DATA; + +#ifndef USE_WDM_FRAMEWORK + +WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(FDO_DEVICE_DATA, FdoGetData) + +typedef struct _QUEUE_DATA +{ + PFDO_DEVICE_DATA FdoData; + +} QUEUE_DATA, *PQUEUE_DATA; + +WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(QUEUE_DATA, QueueGetData) + +#endif + +//////////////////////////////////////////////////////// +// +// FUNCTIONS +// +//////////////////////////////////////////////////////// + +static inline PFDO_DEVICE_DATA hca2fdo(mlnx_hca_t *p_hca) +{ + return CONTAINING_RECORD(p_hca, FDO_DEVICE_DATA, hca); +} + +static inline struct ib_device *hca2ibdev(mlnx_hca_t *p_hca) +{ + return (hca2fdo(p_hca))->bus_ib_ifc.p_ibdev; +} + +static inline mlnx_hca_t *ibdev2hca(struct ib_device *p_ibdev) +{ + return &p_ibdev->x.p_fdo->hca; +} + +static inline struct pci_dev *hca2pdev(mlnx_hca_t *p_hca) +{ + return (hca2fdo(p_hca))->bus_ib_ifc.pdev; +} + +static inline struct mlx4_dev *hca2mdev(mlnx_hca_t *p_hca) +{ + return (hca2fdo(p_hca))->bus_ib_ifc.pdev->dev; +} + +static inline boolean_t hca_is_livefish(PFDO_DEVICE_DATA p_fdo) +{ + return p_fdo->bus_ib_ifc.is_livefish; +} + +static inline ib_api_status_t errno_to_iberr(int err) +{ +#define MAP_NT_ERR(err,ibstatus) case err: ib_status = ibstatus; break + ib_api_status_t ib_status; + + if (!err) + return IB_SUCCESS; + + if (err < 0) + err = -err; + switch (err) { + MAP_NT_ERR( ENOENT, IB_NOT_FOUND ); + MAP_NT_ERR( EINTR, IB_INTERRUPTED ); + MAP_NT_ERR( EAGAIN, IB_RESOURCE_BUSY ); + MAP_NT_ERR( ENOMEM, IB_INSUFFICIENT_MEMORY ); + MAP_NT_ERR( EACCES, IB_INVALID_PERMISSION ); + MAP_NT_ERR( EFAULT, IB_ERROR ); + MAP_NT_ERR( EBUSY, IB_RESOURCE_BUSY ); + MAP_NT_ERR( ENODEV, IB_UNSUPPORTED ); + MAP_NT_ERR( EINVAL, IB_INVALID_PARAMETER ); + MAP_NT_ERR( ENOSYS, IB_UNSUPPORTED ); + MAP_NT_ERR( ERANGE, IB_INVALID_SETTING ); + default: + //HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + // "Unmapped errno (%d)\n", err); + ib_status = IB_UNKNOWN_ERROR; + break; + } + return ib_status; +} + +static inline int start_port(struct ib_device *device) +{ + return device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1; +} + +static inline int end_port(struct ib_device *device) +{ + return device->node_type == RDMA_NODE_IB_SWITCH ? 0 : device->phys_port_cnt; +} + +#ifndef USE_WDM_FRAMEWORK + +//////////////////////////////////////////////////////// +// +// PROTOTYPES +// +//////////////////////////////////////////////////////// + +DRIVER_INITIALIZE DriverEntry; + +void +EvtDriverUnload( + IN WDFDRIVER Driver + ); + +NTSTATUS +EvtDeviceAdd( + IN WDFDRIVER Driver, + IN PWDFDEVICE_INIT Device + ); + +NTSTATUS +EvtDeviceD0Entry( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE PreviousState + ); + +NTSTATUS +EvtDeviceD0Exit( + IN WDFDEVICE Device, + IN WDF_POWER_DEVICE_STATE TargetState + ); + +NTSTATUS +EvtPrepareHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesRaw, + IN WDFCMRESLIST ResourcesTranslated + ); + +NTSTATUS +EvtReleaseHardware( + IN WDFDEVICE Device, + IN WDFCMRESLIST ResourcesTranslated + ); + + +// +// Implemented in wmi.c +// + +NTSTATUS +WmiRegistration( + WDFDEVICE Device +); + +NTSTATUS +EvtStdDataSetItem( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG DataItemId, + IN ULONG InBufferSize, + IN PVOID InBuffer + ); + +NTSTATUS +EvtStdDataSetInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG InBufferSize, + IN PVOID InBuffer + ); + +NTSTATUS +EvtStdDataQueryInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG OutBufferSize, + IN PVOID OutBuffer, + OUT PULONG BufferUsed + ); + +#endif + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/fw.c b/branches/ConnectX/hw/mlx4/kernel/hca/fw.c new file mode 100644 index 00000000..cac46df2 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/fw.c @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "vp.tmh" +#endif + + +/*********************************** +Firmware Update definitions +***********************************/ +#define PCI_CONF_ADDR (0x00000058) +#define PCI_CONF_DATA (0x0000005c) +#define FLASH_OFFSET (0x000f01a4) +#define READ_BIT (1<<29) +#define WRITE_BIT (2<<29) +#define ADDR_MSK (0x0007ffff) +#define CMD_MASK (0xe0000000) +#define BANK_SHIFT (19) +#define BANK_MASK (0xfff80000) +#define MAX_FLASH_SIZE (0x80000) // 512K + +#define SEMAP63 (0xf03fc) +#define GPIO_DIR_L (0xf008c) +#define GPIO_POL_L (0xf0094) +#define GPIO_MOD_L (0xf009c) +#define GPIO_DAT_L (0xf0084) +#define GPIO_DATACLEAR_L (0xf00d4) +#define GPIO_DATASET_L (0xf00dc) + +#define CPUMODE (0xf0150) +#define CPUMODE_MSK (0xc0000000UL) +#define CPUMODE_SHIFT (30) + +/* Definitions intended to become shared with UM. Later... */ +#define FW_READ 0x00 +#define FW_WRITE 0x01 +#define FW_READ_CMD 0x08 +#define FW_WRITE_CMD 0x09 +#define FW_OPEN_IF 0xe7 +#define FW_CLOSE_IF 0x7e + +#define FW_SIGNATURE (0x5a445a44) +#define FW_SECT_SIZE (0x10000) + +typedef struct Primary_Sector{ + uint32_t fi_addr; + uint32_t fi_size; + uint32_t signature; + uint32_t fw_reserved[5]; + uint32_t vsd[56]; + uint32_t branch_to; + uint32_t crc016; +} primary_sector_t; + +static uint32_t old_dir; +static uint32_t old_pol; +static uint32_t old_mod; +static uint32_t old_dat; + + +static NTSTATUS +fw_access_pciconf ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN ULONG op_flag, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + + ULONG bytes; + NTSTATUS status = STATUS_SUCCESS; + + PAGED_CODE(); + + if( !p_buffer ) + return STATUS_INVALID_PARAMETER; + + if (p_BusInterface) + { + + bytes = p_BusInterface->SetBusData( + p_BusInterface->Context, + PCI_WHICHSPACE_CONFIG, + (PVOID)&offset, + PCI_CONF_ADDR, + sizeof(ULONG) ); + + if( op_flag == 0 ) + { + if ( bytes ) + bytes = p_BusInterface->GetBusData( + p_BusInterface->Context, + PCI_WHICHSPACE_CONFIG, + p_buffer, + PCI_CONF_DATA, + length ); + if ( !bytes ) + status = STATUS_NOT_SUPPORTED; + } + + else + { + if ( bytes ) + bytes = p_BusInterface->SetBusData( + p_BusInterface->Context, + PCI_WHICHSPACE_CONFIG, + p_buffer, + PCI_CONF_DATA, + length); + + if ( !bytes ) + status = STATUS_NOT_SUPPORTED; + } + } + return status; +} + + +static NTSTATUS +__map_crspace( + IN struct ib_ucontext * p_uctx, + IN PVOID p_buf, + IN ULONG buf_size + ) +{ + NTSTATUS status; + PMDL p_mdl; + PVOID ua, ka; + ULONG sz; + PFDO_DEVICE_DATA p_fdo = p_uctx->device->x.p_fdo; + map_crspace *p_res = (map_crspace *)p_buf; + struct pci_dev *p_pdev = p_fdo->bus_ib_ifc.pdev; + + HCA_ENTER( HCA_DBG_PNP ); + + // sanity checks + if ( buf_size < sizeof *p_res || !p_buf ) { + status = STATUS_INVALID_PARAMETER; + goto err_invalid_params; + } + + // map memory + sz =(ULONG)p_pdev->bar[HCA_BAR_TYPE_HCR].size; + if (!p_pdev->bar[HCA_BAR_TYPE_HCR].virt) { + PHYSICAL_ADDRESS pa; + pa.QuadPart = p_pdev->bar[HCA_BAR_TYPE_HCR].phys; + ka = MmMapIoSpace( pa, sz, MmNonCached ); + if ( ka == NULL) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("No kernel mapping of CR space.\n") ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto err_map_to_kernel; + } + p_pdev->bar[HCA_BAR_TYPE_HCR].virt = ka; + } + ka = p_pdev->bar[HCA_BAR_TYPE_HCR].virt; + + // prepare for mapping to user space + p_mdl = IoAllocateMdl( ka, sz, FALSE,FALSE,NULL); + if (p_mdl == NULL) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("IoAllocateMdl failed.\n") ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto err_alloc_mdl; + } + + // fill MDL + MmBuildMdlForNonPagedPool(p_mdl); + + // map the buffer into user space + __try + { + ua = MmMapLockedPagesSpecifyCache( p_mdl, UserMode, MmNonCached, + NULL, FALSE, NormalPagePriority ); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("MmMapLockedPagesSpecifyCache failed.\n") ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto err_map_to_user; + } + + // fill the results + p_res->va = (uint64_t)(ULONG_PTR)ua; + p_res->size = sz; + + // resource tracking + p_uctx->x.p_mdl = p_mdl; + p_uctx->x.va = ua; + +#if 0 + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM, + ("MTHCA: __map_crspace succeeded with .ka %I64x, size %I64x va %I64x, size %x, pa %I64x \n", + p_pdev->bar[HCA_BAR_TYPE_HCR].virt, p_pdev->bar[HCA_BAR_TYPE_HCR].size, + p_res->va, p_res->size, p_pdev->bar[HCA_BAR_TYPE_HCR].phys )); +#endif + status = STATUS_SUCCESS; + goto out; + +err_map_to_user: + IoFreeMdl( p_mdl ); +err_alloc_mdl: +err_map_to_kernel: +err_invalid_params: +out: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static void +__unmap_crspace( + IN struct ib_ucontext * p_uctx + ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + if (p_uctx->x.va && p_uctx->x.p_mdl) { + MmUnmapLockedPages(p_uctx->x.va, p_uctx->x.p_mdl); + IoFreeMdl( p_uctx->x.p_mdl ); + p_uctx->x.va = p_uctx->x.p_mdl = NULL; + //NB: the unmap of IO space is being done in __UnmapHcaMemoryResources + } + + HCA_EXIT( HCA_DBG_PNP ); +} + +static void +__open_fw_access( + IN struct ib_ucontext* p_uctx, + IN PBUS_INTERFACE_STANDARD p_bus_interface ) +{ + if( !p_uctx->x.fw_if_open ) + { + p_bus_interface->InterfaceReference( p_bus_interface->Context ); + p_uctx->x.fw_if_open = TRUE; + } +} + +static void +__close_fw_access( + IN struct ib_ucontext * p_uctx, + IN PBUS_INTERFACE_STANDARD p_bus_interface + ) +{ + if (p_uctx->x.fw_if_open ) { + p_bus_interface->InterfaceDereference((PVOID)p_bus_interface->Context); + p_uctx->x.fw_if_open = FALSE; + } +} + + +void +unmap_crspace_for_all( struct ib_ucontext *p_uctx ) +{ + PFDO_DEVICE_DATA p_fdo = p_uctx->device->x.p_fdo; + PBUS_INTERFACE_STANDARD p_bus_interface = &p_fdo->bus_pci_ifc; + + HCA_ENTER( HCA_DBG_PNP ); + + mutex_lock( &p_uctx->x.mutex ); + __unmap_crspace( p_uctx); + __close_fw_access(p_uctx, p_bus_interface); + mutex_unlock( &p_uctx->x.mutex ); + + HCA_EXIT( HCA_DBG_PNP ); +} + +static NTSTATUS +fw_flash_write_data ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + NTSTATUS status; + uint32_t cnt = 0; + uint32_t lcl_data; + + if (!length) + return IB_INVALID_PARAMETER; + + lcl_data = (*((uint32_t*)p_buffer) << 24); + + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &lcl_data, FLASH_OFFSET+4, length ); + if ( status != STATUS_SUCCESS ) + return status; + lcl_data = ( WRITE_BIT | (offset & ADDR_MSK)); + + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &lcl_data, FLASH_OFFSET, 4 ); + if ( status != STATUS_SUCCESS ) + return status; + + lcl_data = 0; + + do + { + if (++cnt > 5000) + { + return STATUS_DEVICE_NOT_READY; + } + + status = fw_access_pciconf(p_BusInterface, FW_READ , &lcl_data, FLASH_OFFSET, 4 ); + if ( status != STATUS_SUCCESS ) + return status; + + } while(lcl_data & CMD_MASK); + + return status; +} + + +static NTSTATUS +fw_flash_read_data ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + NTSTATUS status = STATUS_SUCCESS; + uint32_t cnt = 0; + uint32_t lcl_data = ( READ_BIT | (offset & ADDR_MSK)); + + if (!length) + return IB_INVALID_PARAMETER; + + status = fw_access_pciconf(p_BusInterface, FW_WRITE, &lcl_data, FLASH_OFFSET, 4 ); + if ( status != STATUS_SUCCESS ) + return status; + + lcl_data = 0; + do + { + // Timeout checks + if (++cnt > 5000 ) + { + return STATUS_DEVICE_NOT_READY; + } + + status = fw_access_pciconf(p_BusInterface, FW_READ, &lcl_data, FLASH_OFFSET, 4 ); + + if ( status != STATUS_SUCCESS ) + return status; + + } while(lcl_data & CMD_MASK); + + status = fw_access_pciconf(p_BusInterface, FW_READ, p_buffer, FLASH_OFFSET+4, length ); + return status; +} + +ib_api_status_t +fw_access_ctrl( + IN const ib_ca_handle_t h_ca, + IN const void* __ptr64* const handle_array OPTIONAL, + IN uint32_t num_handles, + IN ib_ci_op_t* const p_ci_op, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + NTSTATUS status = STATUS_SUCCESS; + PVOID p_data; + ULONG offset; + ULONG POINTER_ALIGNMENT length; + struct ib_ucontext *p_uctx = (struct ib_ucontext *)h_ca; + PFDO_DEVICE_DATA p_fdo = p_uctx->device->x.p_fdo; + PBUS_INTERFACE_STANDARD p_bus_interface = &p_fdo->bus_pci_ifc; + + UNREFERENCED_PARAMETER(handle_array); + UNREFERENCED_PARAMETER(num_handles); + + if( !p_umv_buf ) + return IB_UNSUPPORTED; + + if ( !p_ci_op ) + return IB_INVALID_PARAMETER; + + length = p_ci_op->buf_size; + offset = p_ci_op->buf_info; + p_data = p_ci_op->p_buf; + + mutex_lock( &p_uctx->x.mutex ); + + switch ( p_ci_op->command ) + { + case FW_MAP_CRSPACE: + status = __map_crspace(p_uctx, p_data, length); + break; + + case FW_UNMAP_CRSPACE: + __unmap_crspace(p_uctx); + break; + + case FW_OPEN_IF: // open BusInterface + if (p_fdo->bus_pci_ifc_taken) + __open_fw_access( p_uctx, p_bus_interface ); + break; + + case FW_READ: // read data from flash + if ( p_uctx->x.fw_if_open ) + status = fw_flash_read_data(p_bus_interface, p_data, offset, length); + break; + + case FW_WRITE: // write data to flash + if ( p_uctx->x.fw_if_open ) + status = fw_flash_write_data(p_bus_interface, p_data, offset, length); + break; + + case FW_READ_CMD: + if ( p_uctx->x.fw_if_open ) + status = fw_access_pciconf(p_bus_interface, 0 , p_data, offset, 4); + break; + + case FW_WRITE_CMD: + if ( p_uctx->x.fw_if_open ) + status = fw_access_pciconf(p_bus_interface, 1 , p_data, offset, 4); + break; + + case FW_CLOSE_IF: // close BusInterface + __close_fw_access(p_uctx, p_bus_interface); + break; + + default: + status = STATUS_INVALID_DEVICE_REQUEST; + } + + if ( status != STATUS_SUCCESS ) { + __close_fw_access(p_uctx, p_bus_interface); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("fw_access_ctrl failed, ntstatus: %08x.\n", status)); + } + + mutex_unlock( &p_uctx->x.mutex ); + + switch( status ) { + case STATUS_SUCCESS: return IB_SUCCESS; + case STATUS_INVALID_DEVICE_REQUEST: return IB_UNSUPPORTED; + case STATUS_INSUFFICIENT_RESOURCES: return IB_INSUFFICIENT_RESOURCES; + default: return IB_ERROR; + } +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/hca.mof b/branches/ConnectX/hw/mlx4/kernel/hca/hca.mof new file mode 100644 index 00000000..665df469 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/hca.mof @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#PRAGMA AUTORECOVER + +[Dynamic, Provider("WMIProv"), + WMI, + Description("Mlx4 Hca driver information"), + guid("{2C4C8445-E4A6-45bc-889B-E5E93551DDAF}"), + locale("MS\\0x409")] +class Mlx4HcaInformation +{ + [key, read] + string InstanceName; + [read] boolean Active; + + [WmiDataId(1), + read, + Description("The DebugPrintLevel property indicates the debug output level of MLX4_HCA device.")] + uint32 ErrorCount; + + [WmiDataId(2), + read, + write, + Description("The DebugPrintLevel property indicates the debug output flags of MLX4_HCA device.")] + uint32 DebugPrintLevel; + +}; + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/hca.rc b/branches/ConnectX/hw/mlx4/kernel/hca/hca.rc new file mode 100644 index 00000000..80d226e6 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/hca.rc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca.rc 1611 2006-08-20 14:48:55Z sleybo $ + */ + + +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_UNKNOWN +#ifdef DBG +#define VER_FILEDESCRIPTION_STR "MLX4 HCA Driver (checked)" +#else +#define VER_FILEDESCRIPTION_STR "MLX4 HCA Driver" +#endif +#define VER_INTERNALNAME_STR "mlx4_hca.sys" +#define VER_ORIGINALFILENAME_STR "mlx4_hca.sys" +#include diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/makefile.inc b/branches/ConnectX/hw/mlx4/kernel/hca/makefile.inc new file mode 100644 index 00000000..b2b1f9b5 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/makefile.inc @@ -0,0 +1,10 @@ +mofcomp: mlx4_hca.bmf + +mlx4_hca.bmf: hca.mof + mofcomp -B:$(OBJ_PATH)\$O\mlx4_hca.bmf hca.mof + wmimofck $(OBJ_PATH)\$O\mlx4_hca.bmf + + + + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/mcast.c b/branches/ConnectX/hw/mlx4/kernel/hca/mcast.c new file mode 100644 index 00000000..49c70b95 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/mcast.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_mcast.c 1936 2007-02-06 16:04:33Z sleybo $ + */ + + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mcast.tmh" +#endif + +/* +* Multicast Support Verbs. +*/ +ib_api_status_t +mlnx_attach_mcast ( + IN const ib_qp_handle_t h_qp, + IN const ib_gid_t *p_mcast_gid, + IN const uint16_t mcast_lid, + OUT ib_mcast_handle_t *ph_mcast, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + mlnx_mcast_t *p_mcast; + + HCA_ENTER(HCA_DBG_MCAST); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MCAST, + ("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_nosys; + } + + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_nosys; + } + + if (!p_mcast_gid || !ph_mcast) { + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + + if (p_mcast_gid->raw[0] != 0xff || p_ib_qp->qp_type != IB_QPT_UD) { + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + + // allocate structure + p_mcast = (mlnx_mcast_t*)kmalloc(sizeof *p_mcast, GFP_ATOMIC ); + if (p_mcast == NULL) { + status = IB_INSUFFICIENT_MEMORY; + goto err_no_mem; + } + + // attach to mcast group + err = p_ib_qp->device->attach_mcast(p_ib_qp, + (union ib_gid *)p_mcast_gid, mcast_lid); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MCAST, + ("ibv_attach_mcast failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_attach; + } + + // fill the structure + p_mcast->p_ib_qp = p_ib_qp; + p_mcast->mcast_lid = mcast_lid; + RtlCopyMemory(p_mcast->mcast_gid.raw, p_mcast_gid->raw, sizeof *p_mcast_gid); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_MCAST, + ("mcasth %p, qp_p %p, mlid %hx, mgid %I64x`%I64x\n", + p_mcast, p_mcast->p_ib_qp, p_mcast->mcast_lid, + cl_ntoh64(*(uint64_t*)&p_mcast->mcast_gid.raw[0]), + cl_ntoh64(*(uint64_t*)&p_mcast->mcast_gid.raw[8] ))); + + // return the result + if (ph_mcast) *ph_mcast = (ib_mcast_handle_t)p_mcast; + + status = IB_SUCCESS; + goto end; + +err_attach: + kfree(p_mcast); +err_no_mem: +err_invalid_param: +end: +err_nosys: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MCAST, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_MCAST); + return status; +} + +ib_api_status_t +mlnx_detach_mcast ( + IN const ib_mcast_handle_t h_mcast) +{ + int err; + ib_api_status_t status; + mlnx_mcast_t *p_mcast = (mlnx_mcast_t*)h_mcast; + + + HCA_ENTER(HCA_DBG_MCAST); + + // sanity checks + if (!p_mcast || !p_mcast->p_ib_qp) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MCAST, + ("completes with ERROR status IB_INVALID_PARAMETER\n")); + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + if (p_mcast->mcast_gid.raw[0] != 0xff || p_mcast->p_ib_qp->qp_type != IB_QPT_UD) { + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_MCAST, + ("mcasth %p, qp_p %p, mlid %hx, mgid %I64x`%I64x\n", + p_mcast, p_mcast->p_ib_qp, p_mcast->mcast_lid, + *(uint64_t*)&p_mcast->mcast_gid.raw[0], + *(uint64_t*)&p_mcast->mcast_gid.raw[8] )); + + // detach + err = p_mcast->p_ib_qp->device->detach_mcast(p_mcast->p_ib_qp, + (union ib_gid *)&p_mcast->mcast_gid, p_mcast->mcast_lid); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MCAST, + ("ibv_detach_mcast failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_detach_mcast; + } + + kfree(p_mcast); + status = IB_SUCCESS; + +err_detach_mcast: +err_unsupported: +err_invalid_param: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MCAST, + ("completes with ERROR status %d\n", status)); + HCA_EXIT(HCA_DBG_MCAST); + return status; +} + + +void +mlnx_mcast_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->attach_mcast = mlnx_attach_mcast; + p_interface->detach_mcast = mlnx_detach_mcast; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.cdf b/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.cdf new file mode 100644 index 00000000..add4b791 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.cdf @@ -0,0 +1,10 @@ +[CatalogHeader] +Name=mlx4_hca.cat +PublicVersion=0x0000001 +EncodingType=0x00010001 +CATATTR1=0x10010001:OSAttr:2:6.0 +[CatalogFiles] +mlx4_bus.inf=mlx4_hca.inf +mlx4_bus.sys=mlx4_hca.sys +WdfCoInstaller01005.dll=WdfCoInstaller01005.dll + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.inf b/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.inf new file mode 100644 index 00000000..2833698f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca.inf @@ -0,0 +1,217 @@ +; Mellanox Technologies InfiniBand HCAs. +; Copyright 2008 Mellanox Technologies all Rights Reserved. + +[Version] +Signature="$WINDOWS NT$" +Class=Mlx4Hca +ClassGUID={31B0B28A-26FF-4dca-A6FA-E767C7DFBA20} +Provider=%MTL% +; must be synchronized with hca\drv.c +DriverVer=02/01/2008,1.0.0.0 +CatalogFile=mlx4_hca.cat + + +;***************************************** +; Destination directory section +;***************************************** + +[DestinationDirs] +DefaultDestDir = 12 +Wdf_CoInstaller_CopyFiles = 11 + + +;***************************************** +; Class Install section +;***************************************** + +[ClassInstall32] +AddReg=ClassAddReg +CopyFiles=ClassCopyFiles + +[ClassCopyFiles] +IbInstaller.dll + +[ClassAddReg] +HKR,,,,"Mellanox ConnectX Virtual InfiniBand Adapters" +HKR,,Icon,,-5 +HKR,,SilentInstall,,1 +HKLM,"System\CurrentControlSet\Control\CoDeviceInstallers", \ + %Mlx4HcaClassGuid%,%REG_MULTI_SZ_APPEND%, "IbInstaller.dll,IbCoInstaller" + +;***************************************** +; Device Install section +;***************************************** + +[SourceDisksNames.x86] +1=%DiskId%,,,"" + +[SourceDisksNames.amd64] +1=%DiskId%,,,"" + +[SourceDisksNames.ia64] +1=%DiskId%,,,"" + +[SourceDisksFiles.x86] +IbInstaller.dll=1 +mlx4_hca.sys = 1,, +wdfcoinstaller01005.dll = 1,, +;mlx4u.dll=1 +;mlx4ud.dll=1 + +[SourceDisksFiles.amd64] +IbInstaller.dll=1 +mlx4_hca.sys = 1,, +wdfcoinstaller01005.dll = 1,, +;mlx4u.dll=1 +;mlx4ud.dll=1 + +[SourceDisksFiles.ia64] +IbInstaller.dll=1 +mlx4_hca.sys = 1,, +wdfcoinstaller01005.dll = 1,, +;mlx4u.dll=1 +;mlx4ud.dll=1 + +;***************************************** +; Mlx4Bus Install Section +;***************************************** + +[Manufacturer] +%MTL% = MLX4HCA.DeviceSection,ntx86,ntamd64,ntia64 + +[MLX4HCA.DeviceSection] +; empty since we don't support W9x/Me + +[MLX4HCA.DeviceSection.ntx86] +%Mlx4_Hca.DeviceDesc%=MLX4HCA.DDInstall, MLX4\ConnectX_Hca + +[MLX4HCA.DeviceSection.ntamd64] +%Mlx4_Hca.DeviceDesc%=MLX4HCA.DDInstall, MLX4\ConnectX_Hca + +[MLX4HCA.DeviceSection.ntia64] +%Mlx4_Hca.DeviceDesc%=MLX4HCA.DDInstall, MLX4\ConnectX_Hca + +[MLX4HCA.DDInstall.ntx86] +CopyFiles = MLX4HCA.CopyFiles +CopyFiles = MLX4HCA.UMCopyFiles +CopyFiles = MLX4HCA.WOW64CopyFiles +CopyINF=ib_bus.inf + +[MLX4HCA.DDInstall.ntamd64] +CopyFiles = MLX4HCA.CopyFiles +CopyFiles = MLX4HCA.UMCopyFiles +CopyFiles = MLX4HCA.WOW64CopyFiles +CopyINF=ib_bus.inf + +[MLX4HCA.DDInstall.ntia64] +CopyFiles = MLX4HCA.CopyFiles +CopyFiles = MLX4HCA.UMCopyFiles +CopyFiles = MLX4HCA.WOW64CopyFiles +CopyINF=ib_bus.inf + +[MLX4HCA.DDInstall.ntx86.Services] +AddService = mlx4_hca,%SPSVCINST_ASSOCSERVICE%,MLX4HCA.ServiceInstall,MLX4HCA.EventLog + +[MLX4HCA.DDInstall.ntamd64.Services] +AddService = mlx4_hca,%SPSVCINST_ASSOCSERVICE%,MLX4HCA.ServiceInstall,MLX4HCA.EventLog + +[MLX4HCA.DDInstall.ntia64.Services] +AddService = mlx4_hca,%SPSVCINST_ASSOCSERVICE%,MLX4HCA.ServiceInstall,MLX4HCA.EventLog + +[MLX4HCA.CopyFiles] +mlx4_hca.sys + +[MLX4HCA.UMCopyFiles] +;mlx4u.dll,,,2 +;mlx4ud.dll,,,2 + +[MLX4HCA.WOW64CopyFiles] +;mlx4u.dll,mlx4u32.dll,,2 +;mlx4ud.dll,mlx4u32d.dll,,2 + + +;***************************************** +; Service Install section +;***************************************** + +[MLX4HCA.ServiceInstall] +DisplayName = %MLX4HCA.ServiceDesc% +ServiceType = %SERVICE_KERNEL_DRIVER% +StartType = %SERVICE_DEMAND_START% +ErrorControl = %SERVICE_ERROR_NORMAL% +ServiceBinary = %12%\mlx4_hca.sys +LoadOrderGroup = extended base +AddReg = MLX4HCA.ParamsReg + +[MLX4HCA.EventLog] +AddReg = MLX4HCA.AddEventLogReg + +[MLX4HCA.AddEventLogReg] +HKR, , EventMessageFile, 0x00020000, "%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\mlx4_hca.sys" +HKR, , TypesSupported, 0x00010001, 7 + +[MLX4HCA.ParamsReg] +HKR,,DeviceCharacteristics,0x10001,0x0100 ; Use same security checks on relative opens +HKR,,Security,,"D:P(A;;GA;;;BA)(A;;GA;;;SY)" ; Allow generic-all access to Built-in administrators and Local system +HKR,"Parameters","DebugLevel",%REG_DWORD%,0x00000003 +HKR,"Parameters","DebugFlags",%REG_DWORD%,0x0000ffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\F8C96A49-AE22-41e9-8025-D7E416884D89","Flags",%REG_DWORD%,0xffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\F8C96A49-AE22-41e9-8025-D7E416884D89","Level",%REG_DWORD%,0x3 + +; +; The below section is temporarily disabled. +; It should be uncommented after returning MLX4_HCA to WDF model. +; + +;***************************************** +; WDF Coinstaller installation section +;***************************************** + +;[MLX4HCA.DDInstall.ntx86.CoInstallers] +;AddReg=Wdf_CoInstaller_AddReg +;CopyFiles=Wdf_CoInstaller_CopyFiles + +;[MLX4HCA.DDInstall.ntamd64.CoInstallers] +;AddReg=Wdf_CoInstaller_AddReg +;CopyFiles=Wdf_CoInstaller_CopyFiles + +;[MLX4HCA.DDInstall.ntia64.CoInstallers] +;AddReg=Wdf_CoInstaller_AddReg +;CopyFiles=Wdf_CoInstaller_CopyFiles + +;[Wdf_CoInstaller_AddReg] +;HKR,,CoInstallers32,0x00010000, "wdfcoinstaller01005.dll,WdfCoInstaller" + +;[Wdf_CoInstaller_CopyFiles] +;wdfcoinstaller01005.dll + +;[MLX4HCA.DDInstall.ntx86.Wdf] +;KmdfService = mlx4_hca, mlx4_hca_wdfsect + +;[MLX4HCA.DDInstall.ntamd64.Wdf] +;KmdfService = mlx4_hca, mlx4_hca_wdfsect + +;[MLX4HCA.DDInstall.ntia64.Wdf] +;KmdfService = mlx4_hca, mlx4_hca_wdfsect + +;[mlx4_hca_wdfsect] +;KmdfLibraryVersion = 1.5 + + +;***************************************** +; Strings +;***************************************** + +[Strings] +Mlx4HcaClassGuid = "{31B0B28A-26FF-4dca-A6FA-E767C7DFBA20}" +MTL="Mellanox Technologies Ltd." +MLX4HCA.ServiceDesc = "Mellanox ConnectX Virtual Infiband Driver" +Mlx4_Hca.DeviceDesc="Mellanox ConnectX Virtual Channel Adapter" +DiskId = "Mellanox Mlx4 Bus installation disk" +SPSVCINST_NULL = 0x0 +SPSVCINST_ASSOCSERVICE = 0x00000002 +SERVICE_KERNEL_DRIVER = 1 +SERVICE_DEMAND_START = 3 +SERVICE_ERROR_NORMAL = 1 +REG_DWORD = 0x00010001 +REG_MULTI_SZ_APPEND = 0x00010008 diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca32.cdf b/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca32.cdf new file mode 100644 index 00000000..add4b791 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/mlx4_hca32.cdf @@ -0,0 +1,10 @@ +[CatalogHeader] +Name=mlx4_hca.cat +PublicVersion=0x0000001 +EncodingType=0x00010001 +CATATTR1=0x10010001:OSAttr:2:6.0 +[CatalogFiles] +mlx4_bus.inf=mlx4_hca.inf +mlx4_bus.sys=mlx4_hca.sys +WdfCoInstaller01005.dll=WdfCoInstaller01005.dll + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/mr.c b/branches/ConnectX/hw/mlx4/kernel/hca/mr.c new file mode 100644 index 00000000..ba635881 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/mr.c @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_memory.c 2028 2007-07-12 16:00:03Z leonid $ + */ + +#include "precomp.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mr.tmh" +#endif + +/* + * Memory Management Verbs. + */ + +ib_api_status_t +mlnx_register_mr ( + IN const ib_pd_handle_t h_pd, + IN const ib_mr_create_t *p_mr_create, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey, + OUT ib_mr_handle_t *ph_mr, + IN boolean_t um_call ) +{ + ib_api_status_t status; + int err; + struct ib_mr *p_ib_mr; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + ci_umv_buf_t umv_buf = { 0, 0, 0, 0, NULL }; + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + if (!p_mr_create || 0 == p_mr_create->length) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY, + ("invalid attributes\n")); + status = IB_INVALID_PARAMETER; + goto err_invalid_parm; + } + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (p_mr_create->access_ctrl & (IB_AC_RDMA_WRITE | IB_AC_ATOMIC) && + !(p_mr_create->access_ctrl & IB_AC_LOCAL_WRITE)) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY, + ("invalid access rights\n")); + status = IB_INVALID_PERMISSION; + goto err_invalid_access; + } + + // register mr + p_ib_mr = ibv_reg_mr(p_ib_pd, (u64)(ULONG_PTR)p_mr_create->vaddr, + p_mr_create->length, (uint64_t)p_mr_create->vaddr, + to_qp_acl(p_mr_create->access_ctrl), um_call ? &umv_buf : NULL ); + if (IS_ERR(p_ib_mr)) { + err = PTR_ERR(p_ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("ibv_reg_mr failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_reg_mr; + } + + // results + *p_lkey = p_ib_mr->lkey; + *p_rkey = cl_hton32( p_ib_mr->rkey ); + if (ph_mr) *ph_mr = (ib_mr_handle_t)p_ib_mr; + status = IB_SUCCESS; + +err_reg_mr: +err_invalid_access: +err_invalid_parm: +err_unsupported: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + +ib_api_status_t +mlnx_register_pmr ( + IN const ib_pd_handle_t h_pd, + IN const ib_phys_create_t* const p_pmr_create, + IN OUT uint64_t* const p_vaddr, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey, + OUT ib_mr_handle_t* const ph_mr, + IN boolean_t um_call ) +{ + ib_api_status_t status; + int err; + struct ib_mr *p_ib_mr; + struct ib_phys_buf *buffer_list; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + + UNUSED_PARAM( um_call ); + + HCA_ENTER(HCA_DBG_MEMORY); + + if (hca_is_livefish(p_ib_pd->device->x.p_fdo)) { + p_ib_mr = kzalloc(sizeof *p_ib_mr, GFP_KERNEL); + if (!p_ib_mr) { + status = IB_INSUFFICIENT_MEMORY; + goto err_mem; + } + p_ib_mr->device = p_ib_pd->device; + p_ib_mr->pd = p_ib_pd; + goto done; + } + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + if (!p_vaddr || !p_pmr_create || + 0 == p_pmr_create->length ) { + status = IB_INVALID_PARAMETER; + goto err_invalid_parm; + } + + // prepare parameters + buffer_list = (void*)p_pmr_create->range_array; + //NB: p_pmr_create->buf_offset is not used, i.e. supposed that region is page-aligned + //NB: p_pmr_create->hca_page_size is not used, i.e. supposed it is always the same + + // register pmr + if (p_pmr_create->length == (uint64_t)-1i64) + { + p_ib_mr = ib_get_dma_mr( p_ib_pd, + to_qp_acl(p_pmr_create->access_ctrl) ); + } + else + p_ib_mr = ib_reg_phys_mr(p_ib_pd, buffer_list, p_pmr_create->num_ranges, + to_qp_acl(p_pmr_create->access_ctrl), p_vaddr ); + if (IS_ERR(p_ib_mr)) { + err = PTR_ERR(p_ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("mthca_reg_phys_mr failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_reg_phys_mr; + } + + // results +done: + if (ph_mr) *ph_mr = (ib_mr_handle_t)p_ib_mr; + *p_lkey = p_ib_mr->lkey; + *p_rkey = cl_hton32( p_ib_mr->rkey ); + //NB: p_vaddr was not changed + status = IB_SUCCESS; + +err_reg_phys_mr: +err_invalid_parm: +err_unsupported: +err_mem: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %x\n", status)); + + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + +ib_api_status_t +mlnx_query_mr ( + IN const ib_mr_handle_t h_mr, + OUT ib_mr_attr_t *p_mr_query ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_mr *p_ib_mr = (struct ib_mr *)h_mr; + struct ib_mr_attr mr_attr; + UNREFERENCED_PARAMETER(p_mr_query); + + HCA_ENTER(HCA_DBG_MEMORY); + + err = p_ib_mr->device->query_mr ? + p_ib_mr->device->query_mr(p_ib_mr, &mr_attr) : -ENOSYS; + status = errno_to_iberr(err); + + if (err) { + // TODO: convert struct ib_mr_attr to ib_mr_attr_t + } + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_query_mr failed with status %x\n", status)); + + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + + +ib_api_status_t +mlnx_modify_mr ( + IN const ib_mr_handle_t h_mr, + IN const ib_mr_mod_t mem_modify_req, + IN const ib_mr_create_t *p_mr_create, + OUT uint32_t *p_lkey, + OUT uint32_t *p_rkey, + IN const ib_pd_handle_t h_pd OPTIONAL, + IN boolean_t um_call ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(mem_modify_req); + UNREFERENCED_PARAMETER(p_mr_create); + UNREFERENCED_PARAMETER(p_lkey); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(um_call); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_modify_mr not implemented\n")); + return IB_UNSUPPORTED; +} + + +ib_api_status_t +mlnx_modify_pmr ( + IN const ib_mr_handle_t h_mr, + IN const ib_mr_mod_t mem_modify_req, + IN const ib_phys_create_t* const p_pmr_create, + IN OUT uint64_t* const p_vaddr, + OUT uint32_t* const p_lkey, + OUT uint32_t* const p_rkey, + IN const ib_pd_handle_t h_pd OPTIONAL, + IN boolean_t um_call ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(mem_modify_req); + UNREFERENCED_PARAMETER(p_pmr_create); + UNREFERENCED_PARAMETER(p_vaddr); + UNREFERENCED_PARAMETER(p_lkey); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(um_call); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_modify_pmr not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_register_smr ( + IN const ib_mr_handle_t h_mr, + IN const ib_pd_handle_t h_pd, + IN const ib_access_t access_ctrl, + IN OUT uint64_t* const p_vaddr, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey, + OUT ib_mr_handle_t* const ph_mr, + IN boolean_t um_call ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(access_ctrl); + UNREFERENCED_PARAMETER(p_vaddr); + UNREFERENCED_PARAMETER(p_lkey); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(ph_mr); + UNREFERENCED_PARAMETER(um_call); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_register_smr not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_deregister_mr ( + IN const ib_mr_handle_t h_mr) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_mr *p_ib_mr = (struct ib_mr *)h_mr; + + HCA_ENTER(HCA_DBG_SHIM); + + if (hca_is_livefish(p_ib_mr->device->x.p_fdo)) { + kfree(p_ib_mr); + goto done; + } + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + // deregister + err = ib_dereg_mr(p_ib_mr); + status = errno_to_iberr(err); + +err_unsupported: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("ib_dereg_mr failed with status %x\n", status)); + +done: + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + +ib_api_status_t +mlnx_alloc_fmr( + IN const ib_pd_handle_t h_pd, + IN const mlnx_fmr_create_t* const p_fmr_create, + OUT mlnx_fmr_handle_t* const ph_fmr + ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_fmr * p_ib_fmr; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + struct ib_fmr_attr fmr_attr; + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + if (!p_fmr_create ) { + status = IB_INVALID_PARAMETER; + goto err_invalid_parm; + } + // TODO: check Max remap in AL + + // prepare parameters + RtlZeroMemory(&fmr_attr, sizeof(struct ib_fmr_attr)); + fmr_attr.max_maps = p_fmr_create->max_maps; + fmr_attr.max_pages = p_fmr_create->max_pages; + fmr_attr.page_shift = p_fmr_create->page_size; + + // register mr + p_ib_fmr = p_ib_pd->device->alloc_fmr(p_ib_pd, + to_qp_acl(p_fmr_create->access_ctrl), &fmr_attr); + if (IS_ERR(p_ib_fmr)) { + err = PTR_ERR(p_ib_fmr); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY , + ("ib_alloc_fmr failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_fmr; + } + else { + p_ib_fmr->device = p_ib_pd->device; + p_ib_fmr->pd = p_ib_pd; + atomic_inc(&p_ib_pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mlx4_ib_pd*)p_ib_pd)->pdn, p_ib_pd->usecnt)); + } + + // results + if (ph_fmr) + *ph_fmr = (mlnx_fmr_handle_t)p_ib_fmr; + +err_alloc_fmr: +err_invalid_parm: +err_unsupported: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %x\n", status)); + + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + +ib_api_status_t +mlnx_map_phys_fmr ( + IN const mlnx_fmr_handle_t h_fmr, + IN const uint64_t* const page_list, + IN const int list_len, + IN OUT uint64_t* const p_vaddr, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey + ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_fmr *p_ib_fmr = (struct ib_fmr *)h_fmr; + uint64_t vaddr = (*p_vaddr) & ~(PAGE_SIZE - 1); + + HCA_ENTER(HCA_DBG_MEMORY); + + // mapping + err = ib_map_phys_fmr(p_ib_fmr, (u64*)page_list, list_len, (uint64_t)(ULONG_PTR)vaddr); + status = errno_to_iberr(err); + + // return the results + *p_vaddr = vaddr; + *p_lkey = p_ib_fmr->lkey; + *p_rkey = cl_hton32( p_ib_fmr->rkey ); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("ibv_map_phys_fmr failed with status %x\n", status)); + + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + + + +ib_api_status_t +mlnx_unmap_fmr ( + IN const mlnx_fmr_handle_t *ph_fmr) +{ + int err; + struct list_head fmr_list; + ib_api_status_t status = IB_SUCCESS; + struct ib_fmr *p_ib_fmr = (struct ib_fmr *)*ph_fmr; + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + INIT_LIST_HEAD(&fmr_list); + while(*ph_fmr) { + list_add_tail(&p_ib_fmr->list, &fmr_list); + p_ib_fmr = (struct ib_fmr *)*++ph_fmr; + } + + if (list_empty(&fmr_list)) + goto done; + + p_ib_fmr = list_entry(fmr_list.next, struct ib_fmr, list); + err = p_ib_fmr->device->unmap_fmr(&fmr_list); + status = errno_to_iberr(err); + +err_unsupported: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("ibv_unmap_fmr failed with status %x\n", status)); + +done: + HCA_EXIT(HCA_DBG_MEMORY); + return status; + + +} + + +ib_api_status_t +mlnx_dealloc_fmr ( + IN const mlnx_fmr_handle_t h_fmr + ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_fmr *p_ib_fmr = (struct ib_fmr *)h_fmr; + struct ib_pd *pd = p_ib_fmr->pd; + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + // deregister + err = p_ib_fmr->device->dealloc_fmr(p_ib_fmr); + if (!err) + atomic_dec(&pd->usecnt); + status = errno_to_iberr(err); + +err_unsupported: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("ibv_dealloc_fmr failed with status %x\n", status)); + + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + + + +/* +* Memory Window Verbs. +*/ + +ib_api_status_t +mlnx_create_mw ( + IN const ib_pd_handle_t h_pd, + OUT net32_t* const p_rkey, + OUT ib_mw_handle_t *ph_mw, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(ph_mw); + UNREFERENCED_PARAMETER(p_umv_buf); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_create_mw not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_query_mw ( + IN const ib_mw_handle_t h_mw, + OUT ib_pd_handle_t *ph_pd, + OUT net32_t* const p_rkey, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_mw); + UNREFERENCED_PARAMETER(ph_pd); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(p_umv_buf); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_query_mw not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_destroy_mw ( + IN const ib_mw_handle_t h_mw) +{ + UNREFERENCED_PARAMETER(h_mw); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_destroy_mw not implemented\n")); + return IB_UNSUPPORTED; +} + + +void +mlnx_mr_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->register_mr = mlnx_register_mr; + p_interface->register_pmr = mlnx_register_pmr; + p_interface->query_mr = mlnx_query_mr; + p_interface->modify_mr = mlnx_modify_mr; + p_interface->modify_pmr = mlnx_modify_pmr; + p_interface->register_smr = mlnx_register_smr; + p_interface->deregister_mr = mlnx_deregister_mr; + + p_interface->alloc_mlnx_fmr = mlnx_alloc_fmr; + p_interface->map_phys_mlnx_fmr = mlnx_map_phys_fmr; + p_interface->unmap_mlnx_fmr = mlnx_unmap_fmr; + p_interface->dealloc_mlnx_fmr = mlnx_dealloc_fmr; + + p_interface->create_mw = mlnx_create_mw; + p_interface->query_mw = mlnx_query_mw; + p_interface->destroy_mw = mlnx_destroy_mw; +} + +void +mlnx_mr_if_livefish( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->register_pmr = mlnx_register_pmr; + p_interface->deregister_mr = mlnx_deregister_mr; +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/pd.c b/branches/ConnectX/hw/mlx4/kernel/hca/pd.c new file mode 100644 index 00000000..e5c0a728 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/pd.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "pd.tmh" +#endif + + +/* Protection domains */ + +ib_api_status_t +mlnx_allocate_pd ( + IN const ib_ca_handle_t h_ca, + IN const ib_pd_type_t type, + OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status; + struct ib_device *p_ibdev; + struct ib_ucontext *p_uctx; + struct ib_pd *p_ib_pd; + struct ib_udata udata; + struct ibv_alloc_pd_resp *p_resp = NULL; + int err; + + //TODO: how are we to use it ? + UNREFERENCED_PARAMETER(type); + + HCA_ENTER(HCA_DBG_PD); + + if( p_umv_buf ) { + p_uctx = (struct ib_ucontext *)h_ca; + p_ibdev = p_uctx->device; + + // sanity checks + if (p_umv_buf->output_size < sizeof(struct ibv_alloc_pd_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_alloc_pd; + } + + // prepare user parameters + p_resp = (struct ibv_alloc_pd_resp*)p_umv_buf->p_inout_buf; + INIT_UDATA(&udata, NULL, &p_resp->pdn, + 0, sizeof(p_resp->pdn)); + } + else { + mlnx_hca_t *p_hca = (mlnx_hca_t *)h_ca; + p_ibdev = hca2ibdev(p_hca); + p_uctx = NULL; + } + + // create PD + p_ib_pd = p_ibdev->alloc_pd(p_ibdev, p_uctx, &udata); + + if (IS_ERR(p_ib_pd)){ + err = PTR_ERR(p_ib_pd); + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_PD, + ("ibv_alloc_pd failed (%#x)\n", status)); + goto err_alloc_pd; + } + else { + p_ib_pd->device = p_ibdev; + p_ib_pd->p_uctx = p_uctx; + atomic_set(&p_ib_pd->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_PD ,("pdn %d, usecnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)p_ib_pd)->pdn, p_ib_pd->usecnt, p_ib_pd, p_ib_pd->p_uctx)); + } + + // complete user response + if (p_umv_buf && p_umv_buf->command) { + p_resp->pd_handle = (u64)(ULONG_PTR)p_ib_pd; + } + + // return the result + if (ph_pd) *ph_pd = (ib_pd_handle_t)p_ib_pd; + + status = IB_SUCCESS; + +err_alloc_pd: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + HCA_EXIT(HCA_DBG_PD); + return status; +} + +ib_api_status_t +mlnx_deallocate_pd ( + IN ib_pd_handle_t h_pd) +{ + ib_api_status_t status; + int err; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + + HCA_ENTER( HCA_DBG_PD); + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_PD, + ("pcs %p\n", PsGetCurrentProcess())); + + if (!hca_is_livefish(p_ib_pd->device->x.p_fdo)) { + if (atomic_read(&p_ib_pd->usecnt)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_PD, + ("resources are not released (pdn %d, cnt %d)\n", + ((struct mlx4_ib_pd*)p_ib_pd)->pdn, p_ib_pd->usecnt)); + status = IB_RESOURCE_BUSY; + goto err_dealloc_pd; + } + } + + err = p_ib_pd->device->dealloc_pd(p_ib_pd); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_PD + ,("ibv_dealloc_pd failed (%#x)\n", status)); + goto err_dealloc_pd; + } + status = IB_SUCCESS; + +err_dealloc_pd: + HCA_EXIT(HCA_DBG_PD); + return status; +} + + +void +mlnx_pd_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->allocate_pd = mlnx_allocate_pd; + p_interface->deallocate_pd = mlnx_deallocate_pd; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/precomp.h b/branches/ConnectX/hw/mlx4/kernel/hca/precomp.h new file mode 100644 index 00000000..bb5b273f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/precomp.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ +#pragma once + +#include +#include +#define NTSTRSAFE_LIB +#include +#include // required for GUID definitions +#include "public.h" +#include "debug.h" +#include "l2w.h" +#include "verbs.h" +#include "ib\mlx4_ib.h" +#include "drv.h" +#include "mx_abi.h" +#include "vc.h" +#include "ib_cache.h" +#include "data.h" diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/qp.c b/branches/ConnectX/hw/mlx4/kernel/hca/qp.c new file mode 100644 index 00000000..8c01a08a --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/qp.c @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "qp.tmh" +#endif + + +ib_api_status_t +mlnx_query_qp ( + IN const ib_qp_handle_t h_qp, + OUT ib_qp_attr_t *p_qp_attr, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + struct ib_qp_attr qp_attr; + struct ib_qp_init_attr qp_init_attr; + int qp_attr_mask = 0; + int err; + + UNREFERENCED_PARAMETER(p_umv_buf); + + HCA_ENTER( HCA_DBG_QP); + + // sanity checks + if (!p_qp_attr) { + status = IB_INVALID_PARAMETER; + goto err_parm; + } + + // convert structures + memset( &qp_attr, 0, sizeof(struct ib_qp_attr) ); + err = p_ib_qp->device->query_qp( p_ib_qp, &qp_attr, + qp_attr_mask, &qp_init_attr); + if (err){ + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_PD, + ("ib_query_qp failed (%#x)\n", status)); + goto err_query_qp; + } + + status = from_qp_attr( p_ib_qp, &qp_attr, p_qp_attr ); + +err_query_qp: +err_parm: + HCA_EXIT(HCA_DBG_QP); + return status; +} + +static ib_api_status_t +__create_qp ( + IN const ib_pd_handle_t h_pd, + IN const uint8_t port_num, + IN const void *qp_uctx, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_qp * p_ib_qp; + struct ib_qp_init_attr qp_init_attr; + struct ib_ucontext *p_uctx = NULL; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + struct ib_device *p_ib_dev = p_ib_pd->device; + mlnx_hca_t *p_hca = ibdev2hca(p_ib_dev); + struct ibv_create_qp *p_req = NULL; + + HCA_ENTER(HCA_DBG_QP); + + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_qp) || + p_umv_buf->output_size < sizeof(struct ibv_create_qp_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + p_req = (struct ibv_create_qp*)p_umv_buf->p_inout_buf; + p_uctx = p_ib_pd->p_uctx; + } + + // prepare the parameters + RtlZeroMemory(&qp_init_attr, sizeof(qp_init_attr)); + qp_init_attr.event_handler = qp_event_handler; + qp_init_attr.qp_context = p_hca; + qp_init_attr.send_cq = (struct ib_cq *)p_create_attr->h_sq_cq; + qp_init_attr.recv_cq = (struct ib_cq *)p_create_attr->h_rq_cq; + qp_init_attr.srq = (struct ib_srq *)p_create_attr->h_srq; + if( p_umv_buf && p_umv_buf->command ) { + qp_init_attr.cap.max_recv_sge = p_req->max_recv_sge; + qp_init_attr.cap.max_send_sge = p_req->max_send_sge; + qp_init_attr.cap.max_recv_wr = p_req->max_recv_wr; + qp_init_attr.cap.max_send_wr = p_req->max_send_wr; + qp_init_attr.cap.max_inline_data = p_req->max_inline_data; + } + else { + qp_init_attr.cap.max_recv_sge = p_create_attr->rq_sge; + qp_init_attr.cap.max_send_sge = p_create_attr->sq_sge; + qp_init_attr.cap.max_recv_wr = p_create_attr->rq_depth; + qp_init_attr.cap.max_send_wr = p_create_attr->sq_depth; + qp_init_attr.cap.max_inline_data = 0; /* absent in IBAL */ + } + qp_init_attr.sq_sig_type = (p_create_attr->sq_signaled) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + qp_init_attr.qp_type = to_qp_type(p_create_attr->qp_type); + qp_init_attr.port_num = port_num; + + // create qp + p_ib_qp = ibv_create_qp( p_ib_pd, &qp_init_attr, p_uctx, p_umv_buf ); + if (IS_ERR(p_ib_qp)) { + err = PTR_ERR(p_ib_qp); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("ibv_create_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_qp; + } + + // fill the object + p_ib_qp->x.ctx = (void*)qp_uctx; + + // Query QP to obtain requested attributes + if (p_qp_attr) { + status = mlnx_query_qp((ib_qp_handle_t)p_ib_qp, p_qp_attr, p_umv_buf); + if (status != IB_SUCCESS) + goto err_query_qp; + } + + // return the results + if (ph_qp) *ph_qp = (ib_qp_handle_t)p_ib_qp; + + status = IB_SUCCESS; + goto end; + +err_query_qp: + ib_destroy_qp( p_ib_qp ); +err_create_qp: +err_inval_params: +end: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_create_spl_qp ( + IN const ib_pd_handle_t h_pd, + IN const uint8_t port_num, + IN const void *qp_uctx, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp ) +{ + ib_api_status_t status; + + HCA_ENTER(HCA_DBG_SHIM); + + status = __create_qp( h_pd, port_num, + qp_uctx, p_create_attr, p_qp_attr, ph_qp, NULL ); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_create_qp ( + IN const ib_pd_handle_t h_pd, + IN const void *qp_uctx, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status; + + //NB: algorithm of mthca_alloc_sqp() requires port_num + // PRM states, that special pares are created in couples, so + // looks like we can put here port_num = 1 always + uint8_t port_num = 1; + + HCA_ENTER(HCA_DBG_QP); + + status = __create_qp( h_pd, port_num, + qp_uctx, p_create_attr, p_qp_attr, ph_qp, p_umv_buf ); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_modify_qp ( + IN const ib_qp_handle_t h_qp, + IN const ib_qp_mod_t *p_modify_attr, + OUT ib_qp_attr_t *p_qp_attr OPTIONAL, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) +{ + int err; + ib_api_status_t status; + struct ib_qp_attr qp_attr; + int qp_attr_mask; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + + HCA_ENTER(HCA_DBG_QP); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->output_size < sizeof(struct ibv_modify_qp_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + } + + // fill parameters + status = to_qp_attr( p_ib_qp, from_qp_type(p_ib_qp->qp_type), + p_modify_attr, &qp_attr, &qp_attr_mask ); + if (status == IB_NOT_DONE) + goto query_qp; + if (status != IB_SUCCESS ) + goto err_mode_unsupported; + + // modify QP + err = p_ib_qp->device->modify_qp( p_ib_qp, &qp_attr, qp_attr_mask, NULL); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_QP, + ("ibv_modify_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_modify_qp; + } + + // Query QP to obtain requested attributes +query_qp: + if (p_qp_attr) { + status = mlnx_query_qp ((ib_qp_handle_t)p_ib_qp, p_qp_attr, p_umv_buf); + if (status != IB_SUCCESS) + goto err_query_qp; + } + + if( p_umv_buf && p_umv_buf->command ) { + struct ibv_modify_qp_resp resp; + resp.attr_mask = qp_attr_mask; + resp.qp_state = qp_attr.qp_state; + err = to_umv_buf(p_umv_buf, &resp, sizeof(struct ibv_modify_qp_resp)); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("to_umv_buf failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_copy; + } + } + + status = IB_SUCCESS; + +err_copy: +err_query_qp: +err_modify_qp: +err_mode_unsupported: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_ndi_modify_qp ( + IN const ib_qp_handle_t h_qp, + IN const ib_qp_mod_t *p_modify_attr, + OUT ib_qp_attr_t *p_qp_attr OPTIONAL, + IN const uint32_t buf_size, + IN uint8_t* const p_outbuf) +{ + ci_umv_buf_t umv_buf; + ib_api_status_t status; + struct ibv_modify_qp_resp resp; + void *buf = &resp; + + HCA_ENTER(HCA_DBG_QP); + + /* imitate umv_buf */ + umv_buf.command = TRUE; /* special case for NDI. Usually it's TRUE */ + umv_buf.input_size = 0; + umv_buf.output_size = sizeof(struct ibv_modify_qp_resp); + umv_buf.p_inout_buf = buf; + + status = mlnx_modify_qp ( h_qp, p_modify_attr, p_qp_attr, &umv_buf ); + + if (status == IB_SUCCESS) { + cl_memclr( p_outbuf, buf_size ); + *p_outbuf = resp.qp_state; + } + + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_destroy_qp ( + IN const ib_qp_handle_t h_qp, + IN const uint64_t timewait ) +{ + ib_api_status_t status; + int err; + struct ib_qp *p_ib_qp = (struct ib_qp *)h_qp; + + UNUSED_PARAM( timewait ); + + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM , + ("qpnum %#x, pcs %p\n", p_ib_qp->qp_num, PsGetCurrentProcess()) ); + + err = ib_destroy_qp( p_ib_qp ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP, + ("ibv_destroy_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_qp; + } + + status = IB_SUCCESS; + +err_destroy_qp: + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_QP); + return status; +} + +void +mlnx_qp_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->create_qp = mlnx_create_qp; + p_interface->create_spl_qp = mlnx_create_spl_qp; + p_interface->modify_qp = mlnx_modify_qp; + p_interface->ndi_modify_qp = mlnx_ndi_modify_qp; + p_interface->query_qp = mlnx_query_qp; + p_interface->destroy_qp = mlnx_destroy_qp; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/srq.c b/branches/ConnectX/hw/mlx4/kernel/hca/srq.c new file mode 100644 index 00000000..c3aaea55 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/srq.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "srq.tmh" +#endif + + +ib_api_status_t +mlnx_create_srq ( + IN const ib_pd_handle_t h_pd, + IN const void *srq_context, + IN const ib_srq_attr_t * const p_srq_attr, + OUT ib_srq_handle_t *ph_srq, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_srq *p_ib_srq; + struct ib_srq_init_attr srq_init_attr; + struct ib_ucontext *p_uctx = NULL; + struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd; + struct ib_device *p_ib_dev = p_ib_pd->device; + mlnx_hca_t *p_hca = ibdev2hca(p_ib_dev); + + HCA_ENTER(HCA_DBG_SRQ); + + if( p_umv_buf && p_umv_buf->command) { + + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_srq) || + p_umv_buf->output_size < sizeof(struct ibv_create_srq_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + p_uctx = p_ib_pd->p_uctx; + } + + // prepare the parameters + RtlZeroMemory(&srq_init_attr, sizeof(srq_init_attr)); + srq_init_attr.event_handler = srq_event_handler; + srq_init_attr.srq_context = p_hca; + srq_init_attr.attr.max_wr = p_srq_attr->max_wr; + srq_init_attr.attr.max_sge = p_srq_attr->max_sge; + srq_init_attr.attr.srq_limit = p_srq_attr->srq_limit; + + // allocate srq + p_ib_srq = ibv_create_srq(p_ib_pd, &srq_init_attr, p_uctx, p_umv_buf ); + if (IS_ERR(p_ib_srq)) { + err = PTR_ERR(p_ib_srq); + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_SRQ, ("ibv_create_srq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_srq; + } + p_ib_srq->x.ctx = (void*)srq_context; + + // return the result + if (ph_srq) *ph_srq = (ib_srq_handle_t)p_ib_srq; + + status = IB_SUCCESS; + +err_create_srq: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + + +ib_api_status_t +mlnx_modify_srq ( + IN const ib_srq_handle_t h_srq, + IN const ib_srq_attr_t* const p_srq_attr, + IN const ib_srq_attr_mask_t srq_attr_mask, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *p_ib_srq = (struct ib_srq *)h_srq; + UNUSED_PARAM(p_umv_buf); + + HCA_ENTER(HCA_DBG_SRQ); + + err = p_ib_srq->device->modify_srq(p_ib_srq, (void*)p_srq_attr, srq_attr_mask, NULL); + status = errno_to_iberr(err); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +ib_api_status_t +mlnx_query_srq ( + IN const ib_srq_handle_t h_srq, + OUT ib_srq_attr_t* const p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *p_ib_srq = (struct ib_srq *)h_srq; + UNUSED_PARAM(p_umv_buf); + + HCA_ENTER(HCA_DBG_SRQ); + + err = p_ib_srq->device->query_srq(p_ib_srq, (void*)p_srq_attr); + status = errno_to_iberr(err); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +ib_api_status_t +mlnx_destroy_srq ( + IN const ib_srq_handle_t h_srq ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *p_ib_srq = (struct ib_srq *)h_srq; + + HCA_ENTER(HCA_DBG_SRQ); + + err = ib_destroy_srq(p_ib_srq); + status = errno_to_iberr(err); + + if (status != IB_SUCCESS) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %x\n", status)); + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +void +mlnx_srq_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->create_srq = mlnx_create_srq; + p_interface->modify_srq = mlnx_modify_srq; + p_interface->query_srq = mlnx_query_srq; + p_interface->destroy_srq = mlnx_destroy_srq; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/verbs.c b/branches/ConnectX/hw/mlx4/kernel/hca/verbs.c new file mode 100644 index 00000000..82ff8e1d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/verbs.c @@ -0,0 +1,673 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_verbs.c 2073 2007-11-13 11:38:40Z leonid $ + */ + + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "verbs.tmh" +#endif + + +/* Memory regions */ + +struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, enum ib_access_flags mr_access_flags) +{ + struct ib_mr *mr; + + mr = pd->device->get_dma_mr(pd, mr_access_flags); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->p_uctx = pd->p_uctx; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("pdn %d, usecnt %d \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt)); + } + + return mr; +} + +struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + enum ib_access_flags mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *mr; + + if ( pd->device->reg_phys_mr ) + mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + else + mr = ERR_PTR(-ENOSYS); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->p_uctx = pd->p_uctx; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt)); + } + + return mr; +} + + + struct ib_mr *ibv_reg_mr(struct ib_pd *pd, + u64 start, u64 length, + u64 virt_addr, + int mr_access_flags, + ci_umv_buf_t* const p_umv_buf ) +{ + struct ib_mr *ib_mr; + int err; + HCA_ENTER(HCA_DBG_MEMORY); + + if (p_umv_buf && p_umv_buf->command) { + err = -ENOSYS; + goto err_not_supported; + } + + ib_mr = pd->device->reg_user_mr(pd, start, length, virt_addr, mr_access_flags, NULL); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("mthca_reg_user_mr failed (%d)\n", err)); + goto err_reg_user_mr; + } + + ib_mr->device = pd->device; + ib_mr->pd = pd; + atomic_inc(&pd->usecnt); + atomic_set(&ib_mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx)); + HCA_EXIT(HCA_DBG_MEMORY); + return ib_mr; + +err_reg_user_mr: +err_not_supported: + HCA_EXIT(HCA_DBG_MEMORY); + return ERR_PTR(err); +} + +int ib_dereg_mr(struct ib_mr *mr) +{ + int ret; + struct ib_pd *pd; + struct ib_device *p_ibdev; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + p_ibdev = mr->device; + pd = mr->pd; + ret = p_ibdev->dereg_mr(mr); + if (!ret) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("pdn %d, usecnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx)); + } + + return ret; +} + +static void release_user_cq_qp_resources( + struct ib_ucontext *p_uctx) +{ + if (p_uctx) { + atomic_dec(&p_uctx->x.usecnt); + if (!atomic_read(&p_uctx->x.usecnt) && p_uctx->closing) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("User resources are released. Removing context\n")); + ibv_um_close(p_uctx); + } + } +} + +// +// Completion queues +// + +struct ib_cq *ibv_create_cq(struct ib_device *p_ibdev, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe, + struct ib_ucontext *p_uctx, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_cq *cq; + struct ib_udata udata, *p_udata = &udata; + struct ibv_create_cq *p_req; + struct ibv_create_cq_resp *p_resp = NULL; + + if ( p_umv_buf && p_umv_buf->command) { + // prepare user parameters + p_req = (struct ibv_create_cq*)p_umv_buf->p_inout_buf; + p_resp = (struct ibv_create_cq_resp*)p_umv_buf->p_inout_buf; + INIT_UDATA(&udata, &p_req->buf_addr, &p_resp->cqn, + sizeof(struct mlx4_ib_create_cq), sizeof(struct mlx4_ib_create_cq_resp)); + } + else + p_udata = NULL; + + // create cq + cq = p_ibdev->create_cq(p_ibdev, cqe, 0, p_uctx, p_udata); + if (IS_ERR(cq)) { + err = PTR_ERR(cq); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("create_cq failed (%d)\n", err)); + goto err_create_cq; + } + + cq->device = p_ibdev; + cq->p_uctx = p_uctx; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); + if (p_uctx) + atomic_inc(&p_uctx->x.usecnt); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_CQ , + ("created CQ: cqn %#x:%#x \n", ((struct mlx4_ib_cq*)cq)->mcq.cqn, cq->cqe )); + + // fill results + if (p_umv_buf) { + p_resp->cq_handle = (u64)(ULONG_PTR)cq; + p_resp->cqe = cq->cqe; + p_umv_buf->output_size = sizeof(struct ibv_create_cq_resp); + } + + return cq; + +err_create_cq: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + return ERR_PTR(err); +} + +int ib_destroy_cq(struct ib_cq *cq) +{ + int ret; + struct ib_ucontext *p_uctx = cq->p_uctx; + + if (atomic_read(&cq->usecnt)) + return -EBUSY; + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_CQ , + ("destroying CQ: cqn %#x:%#x \n", ((struct mlx4_ib_cq*)cq)->mcq.cqn, cq->cqe )); + + ret = cq->device->destroy_cq(cq); + release_user_cq_qp_resources(p_uctx); + return ret; +} + +// +// Queue pairs +// + +#if DBG +static char *__print_qtype(enum ib_qp_type qtype) +{ + char *str = NULL; + switch (qtype) { + case IB_QPT_SMI: str = "SMI"; break; + case IB_QPT_GSI: str = "GSI"; break; + case IB_QPT_RC: str = "RC"; break; + case IB_QPT_UC: str = "UC"; break; + case IB_QPT_UD: str = "UD"; break; + case IB_QPT_RAW_IP_V6: str = "IP_V6"; break; + case IB_QPT_RAW_ETY: str = "ETY"; break; + default: str = "UKNWN"; break; + } + return str; +} +#endif + +struct ib_qp *ibv_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_qp *p_ib_qp; + struct ib_udata udata, *p_udata = &udata; + struct ibv_create_qp *p_req = NULL; + struct ibv_create_qp_resp *p_resp= NULL; + + HCA_ENTER(HCA_DBG_QP); + + if ( p_umv_buf ) { + // prepare user parameters + p_req = (struct ibv_create_qp*)p_umv_buf->p_inout_buf; + p_resp = (struct ibv_create_qp_resp*)p_umv_buf->p_inout_buf; + INIT_UDATA(&udata, &p_req->buf_addr, NULL, + sizeof(struct mlx4_ib_create_qp), 0); + } + else + p_udata = NULL; + + p_ib_qp = pd->device->create_qp( pd, qp_init_attr, p_udata ); + + if (IS_ERR(p_ib_qp)) { + err = PTR_ERR(p_ib_qp); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP ,("create_qp failed (%d)\n", err)); + goto err_create_qp; + } + + // fill results + p_ib_qp->device = pd->device; + p_ib_qp->pd = pd; + p_ib_qp->send_cq = qp_init_attr->send_cq; + p_ib_qp->recv_cq = qp_init_attr->recv_cq; + p_ib_qp->srq = qp_init_attr->srq; + p_ib_qp->p_uctx = context; + p_ib_qp->event_handler = qp_init_attr->event_handler; + p_ib_qp->qp_context = qp_init_attr->qp_context; + p_ib_qp->qp_type = qp_init_attr->qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + if (context) + atomic_inc(&context->x.usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("pdn %d, usecnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx)); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP , + ("qtype %s (%d), qnum %#x, q_num %#x, ssz %d, rsz %d, scq %#x:%#x, rcq %#x:%#x, port_num %d \n", + __print_qtype(p_ib_qp->qp_type), p_ib_qp->qp_type, + ((struct mlx4_ib_qp*)p_ib_qp)->mqp.qpn, p_ib_qp->qp_num, + qp_init_attr->cap.max_send_wr, qp_init_attr->cap.max_recv_wr, + ((struct mlx4_ib_cq*)p_ib_qp->send_cq)->mcq.cqn, p_ib_qp->send_cq->cqe, + ((struct mlx4_ib_cq*)p_ib_qp->recv_cq)->mcq.cqn, p_ib_qp->recv_cq->cqe, + qp_init_attr->port_num + ) ); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct mlx4_ib_qp *p_mib_qp = (struct mlx4_ib_qp *)p_ib_qp; + p_resp->qp_handle = (__u64)(ULONG_PTR)p_ib_qp; + p_resp->qpn = p_mib_qp->mqp.qpn; + p_resp->max_send_wr = p_mib_qp->sq.max_post; + p_resp->max_recv_wr = p_mib_qp->rq.max_post; + p_resp->max_send_sge = p_mib_qp->sq.max_gs; + p_resp->max_recv_sge = p_mib_qp->rq.max_gs; + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + p_resp->max_inline_data = 0; + p_umv_buf->output_size = sizeof(struct ibv_create_qp_resp); + } + + return p_ib_qp; + +err_create_qp: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + HCA_EXIT(HCA_DBG_QP); + return ERR_PTR(err); +} + +int ib_destroy_qp(struct ib_qp *qp) +{ + struct ib_pd *p_ib_pd; + struct ib_cq *scq, *rcq; + struct ib_srq *srq; + struct ib_ucontext *p_uctx; + int ret; + + p_ib_pd = qp->pd; + scq = qp->send_cq; + rcq = qp->recv_cq; + srq = qp->srq; + p_uctx = p_ib_pd->p_uctx; + + ret = qp->device->destroy_qp(qp); + if (!ret) { + atomic_dec(&p_ib_pd->usecnt); + atomic_dec(&scq->usecnt); + atomic_dec(&rcq->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)p_ib_pd)->pdn, p_ib_pd->usecnt, p_ib_pd, p_ib_pd->p_uctx)); + if (srq) + atomic_dec(&srq->usecnt); + release_user_cq_qp_resources(p_uctx); + } + + return ret; +} + +// +// Shared receive queues +// + + +/* Shared receive queues */ + +struct ib_srq *ibv_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_srq *p_ib_srq; + struct ib_udata udata, *p_udata = &udata; + struct ibv_create_srq *p_req = NULL; + struct ibv_create_srq_resp *p_resp= NULL; + + if ( p_umv_buf && p_umv_buf->command) { + // prepare user parameters + p_req = (struct ibv_create_srq*)p_umv_buf->p_inout_buf; + p_resp = (struct ibv_create_srq_resp*)p_umv_buf->p_inout_buf; + INIT_UDATA(&udata, &p_req->buf_addr, &p_resp->srqn, + sizeof(struct ibv_create_srq), sizeof(struct ibv_create_srq_resp)); + } + else + p_udata = NULL; + + p_ib_srq = pd->device->create_srq( pd, srq_init_attr, p_udata ); + if (IS_ERR(p_ib_srq)) { + err = PTR_ERR(p_ib_srq); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP ,("create_srq failed (%d)\n", err)); + goto err_create_srq; + } + + // fill results + p_ib_srq->device = pd->device; + p_ib_srq->pd = pd; + p_ib_srq->p_uctx = context; + p_ib_srq->event_handler = srq_init_attr->event_handler; + p_ib_srq->srq_context = srq_init_attr->srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&p_ib_srq->usecnt, 0); + if (context) + atomic_inc(&context->x.usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx)); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SRQ , + ("uctx %p, qhndl %p, qnum %#x \n", + pd->p_uctx, p_ib_srq, ((struct mlx4_ib_srq*)p_ib_srq)->msrq.srqn ) ); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct mlx4_ib_srq* p_mib_srq = (struct mlx4_ib_srq*)p_ib_srq; + p_resp->srq_handle = (__u64)(ULONG_PTR)p_ib_srq; + p_resp->max_wr = p_mib_srq->msrq.max - 1; + p_resp->max_sge = p_mib_srq->msrq.max_gs; + p_umv_buf->output_size = sizeof(struct ibv_create_srq_resp); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d \n", + ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt)); + } + + return p_ib_srq; + +err_create_srq: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + HCA_EXIT(HCA_DBG_QP); + return ERR_PTR(err); +} + +int ib_destroy_srq(struct ib_srq *srq) +{ + int ret; + struct ib_pd *p_ib_pd = srq->pd; + struct ib_ucontext *p_uctx = p_ib_pd->p_uctx; + + ret = srq->device->destroy_srq(srq); + if (!ret) { + atomic_dec(&p_ib_pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SRQ ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mlx4_ib_pd*)p_ib_pd)->pdn, p_ib_pd->usecnt, p_ib_pd, p_ib_pd->p_uctx)); + release_user_cq_qp_resources(p_uctx); + } + + return ret; +} + +// +// User context +// +static NTSTATUS __map_memory_for_user( + IN io_addr_t addr, + IN SIZE_T size, + IN MEMORY_CACHING_TYPE mem_type, + OUT umap_t * p_map + ) +{ + NTSTATUS status; + + HCA_ENTER(HCA_DBG_SHIM); + + p_map->mapped = 0; + + // map UAR to kernel + p_map->kva = ioremap(addr, size); + if (!p_map->kva) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW , + ("Couldn't map kernel access region, aborting.\n") ); + status = IB_INSUFFICIENT_MEMORY; + goto err_ioremap; + } + + // build MDL + p_map->mdl = IoAllocateMdl( p_map->kva, (ULONG)size, + FALSE, TRUE, NULL ); + if( !p_map->mdl ) { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_mdl; + } + MmBuildMdlForNonPagedPool( p_map->mdl ); + + /* Map the memory into the calling process's address space. */ + __try { + p_map->uva = MmMapLockedPagesSpecifyCache( p_map->mdl, + UserMode, mem_type, NULL, FALSE, NormalPagePriority ); + } + __except(EXCEPTION_EXECUTE_HANDLER) { + status = IB_INVALID_PERMISSION; + goto err_map; + } + + p_map->mapped = 1; + status = STATUS_SUCCESS; + goto done; + +err_map: + IoFreeMdl(p_map->mdl); + +err_alloc_mdl: + iounmap(p_map->kva, PAGE_SIZE); + +err_ioremap: +done: + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +static void __unmap_memory_for_user( + IN umap_t * p_map + ) +{ + if (p_map->mapped) { + p_map->mapped = 0; + MmUnmapLockedPages( p_map->uva, p_map->mdl ); + IoFreeMdl(p_map->mdl); + iounmap(p_map->kva, PAGE_SIZE); + } +} + +ib_api_status_t ibv_um_open( + IN struct ib_device * p_ibdev, + IN OUT ci_umv_buf_t* const p_umv_buf, + OUT struct ib_ucontext ** pp_uctx ) +{ + int err; + ib_api_status_t status; + struct mlx4_ib_ucontext *p_muctx; + struct ibv_get_context_resp *p_uresp; + struct mlx4_ib_alloc_ucontext_resp ib_alloc_ucontext_resp; + struct ib_ucontext *p_uctx; + struct ib_udata udata; + + HCA_ENTER(HCA_DBG_SHIM); + + // create user context in kernel + INIT_UDATA(&udata, NULL, &ib_alloc_ucontext_resp, + 0, sizeof(struct mlx4_ib_alloc_ucontext_resp)); + + p_uctx = p_ibdev->alloc_ucontext(p_ibdev, &udata); + if (IS_ERR(p_uctx)) { + err = PTR_ERR(p_uctx); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM, + ("mthca_alloc_ucontext failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_ucontext; + } + p_muctx = to_mucontext(p_uctx); + p_uresp = (struct ibv_get_context_resp *)(void*)p_umv_buf->p_inout_buf; + + // fill the rest of ib_ucontext fields + p_uctx->device = p_ibdev; + p_uctx->closing = 0; + + // livefish + if (hca_is_livefish(p_ibdev->x.p_fdo)) + goto done; + + // map uar to user space + status = __map_memory_for_user( + (io_addr_t)p_muctx->uar.pfn << PAGE_SHIFT, + PAGE_SIZE, MmNonCached, &p_uctx->x.uar ); + if( !NT_SUCCESS(status) ) { + goto err_map_uar; + } + p_uresp->uar_addr = (u64)(ULONG_PTR)p_uctx->x.uar.uva; + + // map BF to user space + if (ib_alloc_ucontext_resp.bf_reg_size) { + status = __map_memory_for_user( + (io_addr_t)(p_muctx->uar.pfn + + to_mdev(p_ibdev)->dev->caps.num_uars) << PAGE_SHIFT, + PAGE_SIZE, MmWriteCombined, &p_uctx->x.bf ); + if( !NT_SUCCESS(status) ) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SHIM, + ("BlueFlame available, but failed to be mapped (%#x)\n", status)); + p_uresp->bf_page = 0; + p_uresp->bf_buf_size = 0; + } + else { + p_uresp->bf_page = (u64)(ULONG_PTR)p_uctx->x.bf.uva; + p_uresp->bf_buf_size = ib_alloc_ucontext_resp.bf_reg_size / 2; + p_uresp->bf_offset = 0; + } + } + else { + p_uresp->bf_page = 0; + p_uresp->bf_buf_size = 0; + } + +done: + // fill the response + p_uresp->bf_reg_size = ib_alloc_ucontext_resp.bf_reg_size; + p_uresp->bf_regs_per_page = ib_alloc_ucontext_resp.bf_regs_per_page; + p_uresp->qp_tab_size = ib_alloc_ucontext_resp.qp_tab_size; + + *pp_uctx = p_uctx; + status = IB_SUCCESS; + goto end; + +err_map_uar: + p_ibdev->dealloc_ucontext(p_uctx); +err_alloc_ucontext: +end: + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + + +void ibv_um_close( struct ib_ucontext * h_um_ca ) +{ + int err; + ib_api_status_t status; + struct ib_ucontext *p_uctx = (struct ib_ucontext *)h_um_ca; + PFDO_DEVICE_DATA p_fdo = p_uctx->device->x.p_fdo; + + HCA_ENTER(HCA_DBG_SHIM); + + p_uctx->closing = 1; + + if (atomic_read(&p_uctx->x.usecnt)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("resources are not released (cnt %d)\n", p_uctx->x.usecnt)); + status = IB_RESOURCE_BUSY; + goto err_usage; + } + + if ( !hca_is_livefish(p_fdo)) { + __unmap_memory_for_user( &p_uctx->x.bf ); + __unmap_memory_for_user( &p_uctx->x.uar ); + } + + err = p_fdo->bus_ib_ifc.p_ibdev->dealloc_ucontext(p_uctx); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("mthca_dealloc_ucontext failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_dealloc_ucontext; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM, + ("pcs %p\n", PsGetCurrentProcess()) ); + status = IB_SUCCESS; + goto end; + +err_dealloc_ucontext: +err_usage: +end: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_SHIM); + return; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/verbs.h b/branches/ConnectX/hw/mlx4/kernel/hca/verbs.h new file mode 100644 index 00000000..ec10f2d9 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/verbs.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_verbs.h 1889 2006-12-31 08:33:06Z sleybo $ + */ + +#pragma once + +#include "ib_verbs.h" + +struct ib_mr *ibv_reg_mr(struct ib_pd *pd, + u64 start, u64 length, + u64 virt_addr, + int mr_access_flags, + ci_umv_buf_t* const p_umv_buf ); + +struct ib_cq *ibv_create_cq(struct ib_device *p_ibdev, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe, + struct ib_ucontext *p_uctx, ci_umv_buf_t* const p_umv_buf); + +struct ib_qp *ibv_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +struct ib_srq *ibv_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +ib_api_status_t ibv_um_open( + IN struct ib_device * p_ibdev, + IN OUT ci_umv_buf_t* const p_umv_buf, + OUT struct ib_ucontext ** pp_uctx ); + +void ibv_um_close( struct ib_ucontext * h_um_ca ); + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/vp.c b/branches/ConnectX/hw/mlx4/kernel/hca/vp.c new file mode 100644 index 00000000..f00f814f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/vp.c @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_verbs.c 2073 2007-11-13 11:38:40Z leonid $ + */ + +#include "precomp.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "vp.tmh" +#endif + +static ib_api_status_t +mlnx_um_open( + IN const ib_ca_handle_t h_ca, + IN OUT ci_umv_buf_t* const p_umv_buf, + OUT ib_ca_handle_t* const ph_um_ca ) +{ + ib_api_status_t status; + mlnx_hca_t *p_hca = (mlnx_hca_t *)h_ca; + PFDO_DEVICE_DATA p_fdo = hca2fdo(p_hca); + struct ib_device *p_ibdev = hca2ibdev(p_hca); + struct ib_ucontext *p_uctx; + struct ibv_get_context_resp *p_uresp; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity check + ASSERT( p_umv_buf ); + if( !p_umv_buf->command ) + { // no User Verb Provider + p_uctx = cl_zalloc( sizeof(struct ib_ucontext) ); + if( !p_uctx ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_ucontext; + } + /* Copy the dev info. */ + p_uctx->device = p_ibdev; + p_umv_buf->output_size = 0; + status = IB_SUCCESS; + goto done; + } + + // sanity check + if ( p_umv_buf->output_size < sizeof(struct ibv_get_context_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + + status = ibv_um_open( p_ibdev, p_umv_buf, &p_uctx ); + if (!NT_SUCCESS(status)) { + goto end; + } + + // fill more parameters for user (sanity checks are in mthca_alloc_ucontext) + p_uresp = (struct ibv_get_context_resp *)(void*)p_umv_buf->p_inout_buf; + p_uresp->vend_id = (uint32_t)p_fdo->bus_ib_ifc.pdev->ven_id; + p_uresp->dev_id = (uint16_t)p_fdo->bus_ib_ifc.pdev->dev_id; + p_uresp->max_qp_wr = hca2mdev(p_hca)->caps.max_wqes; + p_uresp->max_cqe = hca2mdev(p_hca)->caps.max_cqes; + p_uresp->max_sge = min( hca2mdev(p_hca)->caps.max_sq_sg, + hca2mdev(p_hca)->caps.max_rq_sg ); + +done: + // fill the rest of ib_ucontext_ex fields + atomic_set(&p_uctx->x.usecnt, 0); + p_uctx->x.va = p_uctx->x.p_mdl = NULL; + p_uctx->x.fw_if_open = FALSE; + mutex_init( &p_uctx->x.mutex ); + + // chain user context to the device + spin_lock( &p_fdo->uctx_lock ); + cl_qlist_insert_tail( &p_fdo->uctx_list, &p_uctx->x.list_item ); + cl_atomic_inc(&p_fdo->usecnt); + spin_unlock( &p_fdo->uctx_lock ); + + // return the result + if (ph_um_ca) *ph_um_ca = (ib_ca_handle_t)p_uctx; + + status = IB_SUCCESS; + goto end; + +err_inval_params: +err_alloc_ucontext: +end: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + + +static void +mlnx_um_close( + IN ib_ca_handle_t h_ca, + IN ib_ca_handle_t h_um_ca ) +{ + struct ib_ucontext *p_uctx = (struct ib_ucontext *)h_um_ca; + PFDO_DEVICE_DATA p_fdo = p_uctx->device->x.p_fdo; + + UNUSED_PARAM(h_ca); + + if ( !hca_is_livefish(p_fdo)) + unmap_crspace_for_all(p_uctx); + spin_lock( &p_fdo->uctx_lock ); + cl_qlist_remove_item( &p_fdo->uctx_list, &p_uctx->x.list_item ); + cl_atomic_dec(&p_fdo->usecnt); + spin_unlock( &p_fdo->uctx_lock ); + if( !p_uctx->x.uar.kva) + cl_free( h_um_ca ); // no User Verb Provider + else + ibv_um_close(p_uctx); +#if 0 + // TODO: replace where pa_cash.c is found + pa_cash_print(); +#endif + return; +} + + +ib_api_status_t +mlnx_local_mad ( + IN const ib_ca_handle_t h_ca, + IN const uint8_t port_num, + IN const ib_av_attr_t* p_av_attr, + IN const ib_mad_t *p_mad_in, + OUT ib_mad_t *p_mad_out ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + mlnx_hca_t *p_hca = (mlnx_hca_t *)h_ca; + PFDO_DEVICE_DATA p_fdo = hca2fdo(p_hca); + struct ib_device *p_ibdev = p_fdo->bus_ib_ifc.p_ibdev; + //TODO: do we need use flags (IB_MAD_IGNORE_MKEY, IB_MAD_IGNORE_BKEY) ? + int mad_flags = 0; + //TODO: do we need use grh ? + struct ib_grh *p_grh = NULL; + ib_wc_t *p_wc = NULL; + + HCA_ENTER(HCA_DBG_MAD); + + // sanity checks + if (port_num > 2) { + status = IB_INVALID_PARAMETER; + goto err_port_num; + } + + if (p_av_attr){ + p_wc = cl_zalloc(sizeof(ib_wc_t)); + if(!p_wc){ + status = IB_INSUFFICIENT_MEMORY ; + goto err_wc_alloc; + } + //Copy part of the attributes need to fill the mad extended fields in mellanox devices + p_wc->recv.ud.remote_lid = p_av_attr->dlid; + p_wc->recv.ud.remote_sl = p_av_attr->sl; + p_wc->recv.ud.path_bits = p_av_attr->path_bits; + p_wc->recv.ud.recv_opt = p_av_attr->grh_valid ? IB_RECV_OPT_GRH_VALID : 0; + + if(p_wc->recv.ud.recv_opt & IB_RECV_OPT_GRH_VALID){ + p_grh = cl_zalloc(sizeof(struct _ib_grh)); + if(!p_grh){ + status = IB_INSUFFICIENT_MEMORY ; + goto err_grh_alloc; + } + p_grh->version_tclass_flow = p_av_attr->grh.ver_class_flow; + p_grh->hop_limit = p_av_attr->grh.hop_limit; + cl_memcpy( &p_grh->sgid, &p_av_attr->grh.src_gid, sizeof(p_grh->sgid) ); + cl_memcpy( &p_grh->dgid, &p_av_attr->grh.dest_gid, sizeof(p_grh->dgid) ); + // TODO: no direct analogue in IBAL (seems like it is from rmpp) + p_grh->paylen = 0; + p_grh->next_hdr = 0; + } + + + } + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_MAD, + ("MAD: Class %02x, Method %02x, Attr %02x, HopPtr %d, HopCnt %d, \n", + (uint32_t)((ib_smp_t *)p_mad_in)->mgmt_class, + (uint32_t)((ib_smp_t *)p_mad_in)->method, + (uint32_t)((ib_smp_t *)p_mad_in)->attr_id, + (uint32_t)((ib_smp_t *)p_mad_in)->hop_ptr, + (uint32_t)((ib_smp_t *)p_mad_in)->hop_count)); + + // process mad + err = p_ibdev->process_mad( p_ibdev, mad_flags, (uint8_t)port_num, + p_wc, p_grh, (struct ib_mad*)p_mad_in, (struct ib_mad*)p_mad_out); + if (!err) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MAD, + ("MAD failed:\n\tClass 0x%x\n\tMethod 0x%x\n\tAttr 0x%x", + p_mad_in->mgmt_class, p_mad_in->method, p_mad_in->attr_id )); + status = IB_ERROR; + goto err_process_mad; + } + + if( (p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR || + p_mad_in->mgmt_class == IB_MCLASS_SUBN_LID) && + p_mad_in->attr_id == IB_MAD_ATTR_PORT_INFO ) + { + ib_port_info_t *p_pi_in, *p_pi_out; + + if( p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR ) + { + p_pi_in = (ib_port_info_t*) + ib_smp_get_payload_ptr( (ib_smp_t*)p_mad_in ); + p_pi_out = (ib_port_info_t*) + ib_smp_get_payload_ptr( (ib_smp_t*)p_mad_out ); + } + else + { + p_pi_in = (ib_port_info_t*)(p_mad_in + 1); + p_pi_out = (ib_port_info_t*)(p_mad_out + 1); + } + + /* Work around FW bug 33958 */ + p_pi_out->subnet_timeout &= 0x7F; + if( p_mad_in->method == IB_MAD_METHOD_SET ) + p_pi_out->subnet_timeout |= (p_pi_in->subnet_timeout & 0x80); + } + + /* Modify direction for Direct MAD */ + if ( p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR ) + p_mad_out->status |= IB_SMP_DIRECTION; + + +err_process_mad: + if(p_grh) + cl_free(p_grh); +err_grh_alloc: + if(p_wc) + cl_free(p_wc); +err_wc_alloc: +err_port_num: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MAD, + ("completes with ERROR status %x\n", status)); + } + HCA_EXIT(HCA_DBG_MAD); + return status; +} + + +void +setup_ci_interface( + IN const ib_net64_t ca_guid, + IN const int is_livefish, + IN OUT ci_interface_t *p_interface ) +{ + cl_memclr(p_interface, sizeof(*p_interface)); + + /* Guid of the CA. */ + p_interface->guid = ca_guid; + + /* Version of this interface. */ + p_interface->version = VERBS_VERSION; + + /* UVP name */ + cl_memcpy( p_interface->libname, mlnx_uvp_lib_name, MAX_LIB_NAME); + + HCA_PRINT(TRACE_LEVEL_VERBOSE , HCA_DBG_SHIM ,("UVP filename %s\n", p_interface->libname)); + + /* The real interface. */ + mlnx_pd_if(p_interface); + p_interface->um_open_ca = mlnx_um_open; + p_interface->um_close_ca = mlnx_um_close; + p_interface->vendor_call = fw_access_ctrl; + + if (is_livefish) { + mlnx_ca_if_livefish(p_interface); + mlnx_mr_if_livefish(p_interface); + } + else { + mlnx_ca_if(p_interface); + mlnx_av_if(p_interface); + mlnx_srq_if(p_interface); + mlnx_qp_if(p_interface); + mlnx_cq_if(p_interface); + mlnx_mr_if(p_interface); + mlnx_direct_if(p_interface); + mlnx_mcast_if(p_interface); + p_interface->local_mad = mlnx_local_mad; + } + + return; +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/hca/wmi.c b/branches/ConnectX/hw/mlx4/kernel/hca/wmi.c new file mode 100644 index 00000000..fe554d45 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/hca/wmi.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "precomp.h" + +#ifdef USE_WDM_FRAMEWORK + +#pragma warning( disable : 4206) + +#else + +#if defined(EVENT_TRACING) +#include "wmi.tmh" +#endif + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(PAGE,WmiRegistration) +#pragma alloc_text(PAGE,EvtStdDataSetItem) +#pragma alloc_text(PAGE,EvtStdDataSetInstance) +#pragma alloc_text(PAGE,EvtStdDataQueryInstance) +#endif + +NTSTATUS +WmiRegistration( + WDFDEVICE Device + ) +/*++ +Routine Description + + Registers with WMI as a data provider for this + instance of the device + +--*/ +{ + WDF_WMI_PROVIDER_CONFIG providerConfig; + WDF_WMI_INSTANCE_CONFIG instanceConfig; + PFDO_DEVICE_DATA deviceData; + NTSTATUS status; + DECLARE_CONST_UNICODE_STRING(hcaRsrcName, HCARESOURCENAME); + + PAGED_CODE(); + + deviceData = FdoGetData(Device); + + // + // Register WMI classes. + // First specify the resource name which contain the binary mof resource. + // + status = WdfDeviceAssignMofResourceName(Device, &hcaRsrcName); + if (!NT_SUCCESS(status)) { + return status; + } + + WDF_WMI_PROVIDER_CONFIG_INIT(&providerConfig, &MLX4_HCA_WMI_STD_DATA_GUID); + providerConfig.MinInstanceBufferSize = sizeof(HCA_WMI_STD_DATA); + + // + // You would want to create a WDFWMIPROVIDER handle separately if you are + // going to dynamically create instances on the provider. Since we are + // statically creating one instance, there is no need to create the provider + // handle. + // + WDF_WMI_INSTANCE_CONFIG_INIT_PROVIDER_CONFIG(&instanceConfig, &providerConfig); + + // + // By setting Register to TRUE, we tell the framework to create a provider + // as part of the Instance creation call. This eliminates the need to + // call WdfWmiProviderRegister. + // + instanceConfig.Register = TRUE; + instanceConfig.EvtWmiInstanceQueryInstance = EvtStdDataQueryInstance; + instanceConfig.EvtWmiInstanceSetInstance = EvtStdDataSetInstance; + instanceConfig.EvtWmiInstanceSetItem = EvtStdDataSetItem; + + status = WdfWmiInstanceCreate( Device, + &instanceConfig, WDF_NO_OBJECT_ATTRIBUTES, WDF_NO_HANDLE ); + + return status; +} + +// +// WMI System Call back functions +// +NTSTATUS +EvtStdDataSetItem( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG DataItemId, + IN ULONG InBufferSize, + IN PVOID InBuffer + ) +/*++ + +Routine Description: + + This routine is a callback into the driver to set for the contents of + an instance. + +Arguments: + + WmiInstance is the instance being set + + DataItemId has the id of the data item being set + + InBufferSize has the size of the data item passed + + InBuffer has the new values for the data item + +Return Value: + + status + +--*/ +{ + PFDO_DEVICE_DATA fdoData; + + PAGED_CODE(); + + fdoData = FdoGetData(WdfWmiInstanceGetDevice(WmiInstance)); + + switch(DataItemId) + { + case 1: + if (InBufferSize < sizeof(ULONG)) { + return STATUS_BUFFER_TOO_SMALL; + } + fdoData->WmiData.DebugPrintLevel = *((PULONG)InBuffer); + return STATUS_SUCCESS; + + case 2: + if (InBufferSize < sizeof(ULONG)) { + return STATUS_BUFFER_TOO_SMALL; + } + fdoData->WmiData.DebugPrintFlags = *((PULONG)InBuffer); + return STATUS_SUCCESS; + + default: + return STATUS_WMI_READ_ONLY; + } +} + +NTSTATUS +EvtStdDataSetInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG InBufferSize, + IN PVOID InBuffer + ) +/*++ + +Routine Description: + + This routine is a callback into the driver to set for the contents of + an instance. + +Arguments: + + WmiInstance is the instance being set + + BufferSize has the size of the data block passed + + Buffer has the new values for the data block + +Return Value: + + status + +--*/ +{ + PFDO_DEVICE_DATA fdoData; + + UNREFERENCED_PARAMETER(InBufferSize); + + PAGED_CODE(); + + fdoData = FdoGetData(WdfWmiInstanceGetDevice(WmiInstance)); + + // + // We will update only writable elements. + // + memcpy( &fdoData->WmiData, InBuffer, + min(sizeof(HCA_WMI_STD_DATA), InBufferSize)); + + return STATUS_SUCCESS; +} + +NTSTATUS +EvtStdDataQueryInstance( + IN WDFWMIINSTANCE WmiInstance, + IN ULONG OutBufferSize, + IN PVOID OutBuffer, + OUT PULONG BufferUsed + ) +/*++ + +Routine Description: + + This routine is a callback into the driver to set for the contents of + a wmi instance + +Arguments: + + WmiInstance is the instance being set + + OutBufferSize on has the maximum size available to write the data + block. + + OutBuffer on return is filled with the returned data block + + BufferUsed pointer containing how many bytes are required (upon failure) or + how many bytes were used (upon success) + +Return Value: + + status + +--*/ +{ + PFDO_DEVICE_DATA fdoData; + + UNREFERENCED_PARAMETER(OutBufferSize); + + PAGED_CODE(); + + fdoData = FdoGetData(WdfWmiInstanceGetDevice(WmiInstance)); + + *BufferUsed = sizeof (HCA_WMI_STD_DATA); + * (PHCA_WMI_STD_DATA) OutBuffer = fdoData->WmiData; + + return STATUS_SUCCESS; +} + +#endif + diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/Kconfig b/branches/ConnectX/hw/mlx4/kernel/ib/Kconfig new file mode 100644 index 00000000..4175a4bd --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/Kconfig @@ -0,0 +1,8 @@ +config MLX4_INFINIBAND + tristate "Mellanox ConnectX HCA support" + select MLX4_CORE + ---help--- + This driver provides low-level InfiniBand support for + Mellanox ConnectX PCI Express host channel adapters (HCAs). + This is required to use InfiniBand protocols such as + IP-over-IB or SRP with these devices. diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/Makefile.lnx b/branches/ConnectX/hw/mlx4/kernel/ib/Makefile.lnx new file mode 100644 index 00000000..70f09c78 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/Makefile.lnx @@ -0,0 +1,3 @@ +obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o + +mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/SOURCES b/branches/ConnectX/hw/mlx4/kernel/ib/SOURCES new file mode 100644 index 00000000..62c25fd9 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/SOURCES @@ -0,0 +1,45 @@ +TARGETNAME=mlx4_ib +TARGETPATH=..\..\..\..\bin\kernel\obj$(BUILD_ALT_DIR) +TARGETTYPE=DRIVER_LIBRARY + + + +!if $(FREEBUILD) +#ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + + +DLLDEF=ib.def + +SOURCES= ib.rc \ + ah.c \ + cq.c \ + doorbell.c \ + mad.c \ + main.c \ + mr.c \ + qp.c \ + srq.c \ + +INCLUDES=..;..\inc;..\..\inc;..\..\..\..\inc;..\..\..\..\inc\kernel; + +C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN + +TARGETLIBS= \ + $(DDK_LIB_PATH)\ntstrsafe.lib \ + $(TARGETPATH)\*\complib.lib \ + $(TARGETPATH)\*\mlx4_core.lib \ + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING + +RUN_WPP = $(SOURCES) -km -ext: .c .h .C .H \ + -scan:..\mlx4_debug.h \ + -func:MLX4_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:MLX4_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +MSC_WARNING_LEVEL= /W4 diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/ah.c b/branches/ConnectX/hw/mlx4/kernel/ib/ah.c new file mode 100644 index 00000000..d281e629 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/ah.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_dev *dev = to_mdev(pd->device)->dev; + struct mlx4_ib_ah *ah; + + ah = kmalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + memset(&ah->av, 0, sizeof ah->av); + + ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.g_slid = ah_attr->src_path_bits; + ah->av.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) + --ah->av.stat_rate; + } + ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + ah->av.g_slid |= 0x80; + ah->av.gid_index = ah_attr->grh.sgid_index; + ah->av.hop_limit = ah_attr->grh.hop_limit; + ah->av.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + } + + return &ah->ibah; +} + +int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah = to_mah(ibah); + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = be16_to_cpu(ah->av.dlid); + ah_attr->sl = (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28); + ah_attr->port_num = (u8)(be32_to_cpu(ah->av.port_pd) >> 24); + if (ah->av.stat_rate) + ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.g_slid & 0x7F; + + if (mlx4_ib_ah_grh_present(ah)) { + ah_attr->ah_flags = IB_AH_GRH; + + ah_attr->grh.traffic_class = + (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20); + ah_attr->grh.flow_label = + be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.hop_limit; + ah_attr->grh.sgid_index = ah->av.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); + } + + return 0; +} + +int mlx4_ib_destroy_ah(struct ib_ah *ah) +{ + kfree(to_mah(ah)); + return 0; +} + +// Leo: temporary +int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr *ah_attr ) +{ + struct mlx4_av *av = &to_mah(ibah)->av; + struct mlx4_dev *dev = to_mdev(ibah->pd->device)->dev; + + // taken from mthca_create_av + av->port_pd = cpu_to_be32(to_mpd(ibah->pd)->pdn | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + av->dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + av->stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (av->stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << av->stat_rate & dev->caps.stat_rate_support)) + --av->stat_rate; + } + av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + av->g_slid |= 0x80; + av->gid_index = ah_attr->grh.sgid_index; + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } + return 0; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/cq.c b/branches/ConnectX/hw/mlx4/kernel/ib/cq.c new file mode 100644 index 00000000..c1090eec --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/cq.c @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" +#include "cq.h" +#include "qp.h" +#include "user.h" + +static void mlx4_ib_cq_comp(struct mlx4_cq *cq) +{ + struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) +{ + struct ib_event event; + struct ib_cq *ibcq; + + if (type != MLX4_EVENT_TYPE_CQ_ERROR) { + printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + "on CQ %06x\n", type, cq->cqn); + return; + } + + ibcq = &to_mibcq(cq)->ibcq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) +{ + int offset = n * sizeof (struct mlx4_cqe); + + if (buf->buf.nbufs == 1) + return buf->buf.u.direct.buf + offset; + else + return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} + +static void *get_cqe(struct mlx4_ib_cq *cq, int n) +{ + return get_cqe_from_buf(&cq->buf, n); +} + +static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) +{ + struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); + + return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; +} + +static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + struct mlx4_ib_cq *mcq = to_mcq(cq); + struct mlx4_ib_dev *dev = to_mdev(cq->device); + struct mlx4_cq_context *context; + int err; + + context = kzalloc(sizeof *context, GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->cq_period = cpu_to_be16(cq_period); + context->cq_max_count = cpu_to_be16(cq_count); + err = mlx4_cq_modify(dev->dev, &mcq->mcq, context, 1); + + kfree(context); + return err; +} + +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_cq *cq; + struct mlx4_uar *uar; + int buf_size; + int err; + + UNUSED_PARAM(vector); + + if (entries < 1 || entries > dev->dev->caps.max_cqes) + return ERR_PTR(-EINVAL); + + cq = kmalloc(sizeof *cq, GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + entries = roundup_pow_of_two(entries + 1); + cq->ibcq.cqe = entries - 1; + buf_size = entries * sizeof (struct mlx4_cqe); + spin_lock_init(&cq->lock); + + if (context) { + struct mlx4_ib_create_cq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err_cq; + } + + cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(cq->umem)) { + err = PTR_ERR(cq->umem); + goto err_cq; + } + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem), + ilog2(cq->umem->page_size), &cq->buf.mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem); + if (err) + goto err_mtt; + + err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + &cq->db); + if (err) + goto err_mtt; + + // add mapping to user's arm_sn variable + // we have no way pass the completion event to provider library + // so we'll increment user's arm_sn in kernel + err = ib_umem_map( ucmd.arm_sn_addr, sizeof(int), + IB_ACCESS_LOCAL_WRITE, &cq->mcq.mdl, &cq->mcq.p_u_arm_sn ); + if (err) + goto err_dbmap; + + uar = &to_mucontext(context)->uar; + } else { + err = mlx4_ib_db_alloc(dev, &cq->db, 1); + if (err) + goto err_cq; + + cq->mcq.set_ci_db = cq->db.db; + cq->mcq.arm_db = cq->db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) { + err = -ENOMEM; + goto err_db; + } + + err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift, + &cq->buf.mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf); + if (err) + goto err_mtt; + + cq->mcq.p_u_arm_sn = NULL; + uar = &dev->priv_uar; + } + + err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, + cq->db.dma.da, &cq->mcq); + if (err) + goto err_dbmap; + + cq->mcq.comp = mlx4_ib_cq_comp; + cq->mcq.event = mlx4_ib_cq_event; + + if (context) + if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { + err = -EFAULT; + goto err_dbmap; + } + + return &cq->ibcq; + +err_dbmap: + ib_umem_unmap( cq->mcq.mdl, cq->mcq.p_u_arm_sn ); + if (context) + mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); + +err_buf: + if (context) + ib_umem_release(cq->umem); + else + mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe), + &cq->buf.buf); + +err_db: + if (!context) + mlx4_ib_db_free(dev, &cq->db); + +err_cq: + kfree(cq); + + return ERR_PTR(err); +} + +int mlx4_ib_destroy_cq(struct ib_cq *cq) +{ + struct mlx4_ib_dev *dev = to_mdev(cq->device); + struct mlx4_ib_cq *mcq = to_mcq(cq); + + mlx4_cq_free(dev->dev, &mcq->mcq); + mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); + + if (cq->p_uctx) { + ib_umem_unmap( mcq->mcq.mdl, mcq->mcq.p_u_arm_sn ); + mlx4_ib_db_unmap_user(to_mucontext(cq->p_uctx), &mcq->db); + ib_umem_release(mcq->umem); + } else { + mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe), + &mcq->buf.buf); + mlx4_ib_db_free(dev, &mcq->db); + } + + kfree(mcq); + + return 0; +} + +static void dump_cqe(void *cqe) +{ + __be32 *buf = cqe; + + printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), + be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), + be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); +} + +static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, + ib_wc_t *wc) +{ + if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { + printk(KERN_DEBUG "local QP operation err " + "(QPN %06x, WQE index %x, vendor syndrome %02x, " + "opcode = %02x)\n", + be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), + cqe->vendor_err_syndrome, + cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); + dump_cqe(cqe); + } + + switch (cqe->syndrome) { + case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WCS_LOCAL_LEN_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WCS_LOCAL_OP_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WCS_LOCAL_PROTECTION_ERR; + break; + case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: + wc->status = IB_WCS_WR_FLUSHED_ERR; + break; + case MLX4_CQE_SYNDROME_MW_BIND_ERR: + wc->status = IB_WCS_MEM_WINDOW_BIND_ERR; + break; + case MLX4_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WCS_BAD_RESP_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WCS_LOCAL_ACCESS_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WCS_REM_INVALID_REQ_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WCS_REM_ACCESS_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WCS_REM_OP_ERR; + break; + case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WCS_TIMEOUT_RETRY_ERR; + break; + case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WCS_RNR_RETRY_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: + wc->status = IB_WCS_REM_ABORT_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + wc->vendor_specific = cqe->vendor_err_syndrome; +} + +static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, + struct mlx4_ib_qp **cur_qp, + ib_wc_t *wc) +{ + struct mlx4_cqe *cqe; + struct mlx4_qp *mqp; + struct mlx4_ib_wq *wq; + struct mlx4_ib_srq *srq; + int is_send; + int is_error; + u16 wqe_ctr; + + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + ++cq->mcq.cons_index; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; + is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR; + + if (!*cur_qp || + (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (u32)(*cur_qp)->mqp.qpn) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, + be32_to_cpu(cqe->my_qpn)); + if (unlikely(!mqp)) { + printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", + cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); + return -EINVAL; + } + + *cur_qp = to_mibqp(mqp); + } + + wc->opaque= &(*cur_qp)->ibqp; + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wq->tail += (u16) (wqe_ctr - (u16) wq->tail); + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibqp.srq) { + srq = to_msrq((*cur_qp)->ibqp.srq); + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx4_ib_free_srq_wqe(srq, wqe_ctr); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + + if (is_send) { + wc->recv.ud.recv_opt = 0; + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_OPCODE_RDMA_WRITE_IMM: + wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + case MLX4_OPCODE_RDMA_WRITE: + wc->wc_type = IB_WC_RDMA_WRITE; + break; + case MLX4_OPCODE_SEND_IMM: + wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + case MLX4_OPCODE_SEND: + wc->wc_type = IB_WC_SEND; + break; + case MLX4_OPCODE_RDMA_READ: + wc->wc_type = IB_WC_RDMA_READ; + wc->length = be32_to_cpu(cqe->byte_cnt); + break; + case MLX4_OPCODE_ATOMIC_CS: + wc->wc_type = IB_WC_COMPARE_SWAP; + wc->length = 8; + break; + case MLX4_OPCODE_ATOMIC_FA: + wc->wc_type = IB_WC_FETCH_ADD; + wc->length = 8; + break; + case MLX4_OPCODE_BIND_MW: + wc->wc_type = IB_WC_MW_BIND; + break; + } + } else { + wc->length = be32_to_cpu(cqe->byte_cnt); + + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: + wc->wc_type = IB_WC_RECV_RDMA_WRITE; + wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + wc->recv.ud.immediate_data = cqe->immed_rss_invalid; + break; + case MLX4_RECV_OPCODE_SEND: + wc->wc_type = IB_WC_RECV; + wc->recv.ud.recv_opt = 0; + break; + case MLX4_RECV_OPCODE_SEND_IMM: + wc->wc_type = IB_WC_RECV; + wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + wc->recv.ud.immediate_data = cqe->immed_rss_invalid; + break; + } + + wc->recv.ud.remote_lid = cqe->rlid; + wc->recv.ud.remote_sl = cqe->sl >> 4; + wc->recv.ud.remote_qp = cqe->g_mlpath_rqpn & 0xffffff00; + wc->recv.ud.path_bits = (u8)(cqe->g_mlpath_rqpn & 0x7f); + wc->recv.ud.recv_opt |= cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0; + wc->recv.ud.pkey_index = (u16)(be32_to_cpu(cqe->immed_rss_invalid) & 0x7f); + } + if (!is_send && cqe->rlid == 0){ + MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid == 0 \n ")); + wc->recv.ud.recv_opt |= IB_RECV_OPT_FORWARD; + } + + if (unlikely(is_error)) + mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); + else + wc->status = IB_WCS_SUCCESS; + + return 0; +} + +int mlx4_ib_poll_cq( + IN struct ib_cq *ibcq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ) +{ + struct mlx4_ib_cq *cq = to_mcq(ibcq); + struct mlx4_ib_qp *cur_qp = NULL; + unsigned long flags; + int err = 0; + int npolled = 0; + ib_wc_t *wc_p, **next_pp; + + spin_lock_irqsave(&cq->lock, &flags); + + // loop through CQ + next_pp = pp_done_wclist; + wc_p = *pp_free_wclist; + while( wc_p ) { + // poll one CQE + err = mlx4_ib_poll_one(cq, &cur_qp, wc_p); + if (err) + break; + + // prepare for the next loop + *next_pp = wc_p; + next_pp = &wc_p->p_next; + wc_p = wc_p->p_next; + ++npolled; + } + + // prepare the results + *pp_free_wclist = wc_p; /* Set the head of the free list. */ + *next_pp = NULL; /* Clear the tail of the done list. */ + + // update consumer index + if (npolled) + mlx4_cq_set_ci(&cq->mcq); + + spin_unlock_irqrestore(&cq->lock, flags); + return (err == 0 || err == -EAGAIN)? npolled : err; +} + +int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + mlx4_cq_arm(&to_mcq(ibcq)->mcq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT, + to_mdev(ibcq->device)->uar_map, + MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock)); + + return 0; +} + +void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) +{ + u32 prod_index; + int nfreed = 0; + struct mlx4_cqe *cqe, *dest; + u8 owner_bit; + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index) + if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) + break; + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { + if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) + mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; + memcpy(dest, cqe, sizeof *cqe); + dest->owner_sr_opcode = owner_bit | + (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); + } + } + + if (nfreed) { + cq->mcq.cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + mlx4_cq_set_ci(&cq->mcq); + } +} + +void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) +{ + spin_lock_irq(&cq->lock); + __mlx4_ib_cq_clean(cq, qpn, srq); + spin_unlock_irq(&cq->lock); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/doorbell.c b/branches/ConnectX/hw/mlx4/kernel/ib/doorbell.c new file mode 100644 index 00000000..23c6574c --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/doorbell.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "l2w.h" +#include "mlx4_ib.h" + +struct mlx4_ib_db_pgdir { + struct list_head list; + DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE); + DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2); + unsigned long *bits[2]; + __be32 *db_page; + dma_addr_t db_dma; +}; + +static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev) +{ + struct mlx4_ib_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); + if (!pgdir) + return NULL; + + bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2); + pgdir->bits[0] = pgdir->order0; + pgdir->bits[1] = pgdir->order1; + pgdir->db_page = dma_alloc_coherent(&dev->ib_dev.dma_device, + PAGE_SIZE, &pgdir->db_dma, + GFP_KERNEL); + if (!pgdir->db_page) { + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir, + struct mlx4_ib_db *db, int order) +{ + int o; + int i; + + for (o = order; o <= 1; ++o) { + i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o); + if (i < MLX4_IB_DB_PER_PAGE >> o) + goto found; + } + + return -ENOMEM; + +found: + clear_bit(i, pgdir->bits[o]); + + i <<= o; + + if (o > order) + set_bit(i ^ 1, pgdir->bits[order]); + + db->u.pgdir = pgdir; + db->index = i; + db->db = pgdir->db_page + db->index; + db->dma.da = pgdir->db_dma.da + db->index * 4; + db->order = order; + + return 0; +} + +int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order) +{ + struct mlx4_ib_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&dev->pgdir_mutex); + + list_for_each_entry(pgdir, &dev->pgdir_list, list, struct mlx4_ib_db_pgdir) + if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)) + goto out; + + pgdir = mlx4_ib_alloc_db_pgdir(dev); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &dev->pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)); + +out: + + mutex_unlock(&dev->pgdir_mutex); + + return ret; +} + +void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db) +{ + int o; + int i; + + mutex_lock(&dev->pgdir_mutex); + + o = db->order; + i = db->index; + + CL_ASSERT(db->u.pgdir); + + if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { + clear_bit(i ^ 1, db->u.pgdir->order0); + ++o; + } + + i >>= o; + set_bit(i, db->u.pgdir->bits[o]); + + if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) { + dma_free_coherent(&dev->ib_dev.dma_device, PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir); + } + + mutex_unlock(&dev->pgdir_mutex); +} + +struct mlx4_ib_user_db_page { + struct list_head list; + struct ib_umem *umem; + u64 user_virt; + int refcnt; +}; + +int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, u64 virt, + struct mlx4_ib_db *db) +{ + struct mlx4_ib_user_db_page *page; + int err = 0; + + mutex_lock(&context->db_page_mutex); + + list_for_each_entry(page, &context->db_page_list, list, struct mlx4_ib_user_db_page) + if (page->user_virt == (virt & (u64)PAGE_MASK)) + goto found; + + page = kmalloc(sizeof *page, GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + page->user_virt = virt & (u64)PAGE_MASK; + page->refcnt = 0; + page->umem = ib_umem_get(&context->ibucontext, virt & (u64)PAGE_MASK, + PAGE_SIZE, 0); + if (IS_ERR(page->umem)) { + err = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &context->db_page_list); + +found: + db->dma = ib_umem_get_dma(page->umem); + db->dma.da += (virt & (u64)(~PAGE_MASK)); + db->u.user_page = page; + ++page->refcnt; + +out: + mutex_unlock(&context->db_page_mutex); + + return err; +} + +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db) +{ + mutex_lock(&context->db_page_mutex); + + if (!--db->u.user_page->refcnt) { + list_del(&db->u.user_page->list); + ib_umem_release(db->u.user_page->umem); + kfree(db->u.user_page); + } + + mutex_unlock(&context->db_page_mutex); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/ib.def b/branches/ConnectX/hw/mlx4/kernel/ib/ib.def new file mode 100644 index 00000000..a78e9d07 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/ib.def @@ -0,0 +1,11 @@ +LIBRARY mlx4_ib.lib + +EXPORTS +; DllInitialize and DllUnload must be exported for the OS reference counting to +; work, and must be private for the compiler to accept them. +DllInitialize private +DllUnload private + +; main.c +mlx4_ib_init +mlx4_ib_cleanup \ No newline at end of file diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/ib.rc b/branches/ConnectX/hw/mlx4/kernel/ib/ib.rc new file mode 100644 index 00000000..db7b710d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/ib.rc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ibal.rc 1611 2006-08-20 14:48:55Z sleybo $ + */ + + +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_UNKNOWN + +#ifdef _DEBUG_ +#define VER_FILEDESCRIPTION_STR "MLX4 InfiniBand Specific Services (Debug)" +#else +#define VER_FILEDESCRIPTION_STR "MLX4 InfiniBand Specific Services" +#endif + +#define VER_INTERNALNAME_STR "mlx4_ib.lib" +#define VER_ORIGINALFILENAME_STR "mlx4_ib.lib" + +#include diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/mad.c b/branches/ConnectX/hw/mlx4/kernel/ib/mad.c new file mode 100644 index 00000000..68f686b2 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/mad.c @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" +#include +#include +#include "cmd.h" + +enum { + MLX4_IB_VENDOR_CLASS1 = 0x9, + MLX4_IB_VENDOR_CLASS2 = 0xa +}; + +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, ib_wc_t *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad) +{ + struct mlx4_cmd_mailbox *inmailbox, *outmailbox; + u8 *inbox; + int err; + u32 in_modifier = port; + u8 op_modifier = 0; + + inmailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; + + outmailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(outmailbox)) { + mlx4_free_cmd_mailbox(dev->dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); + + /* + * Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + if (in_wc) { + struct { + __be32 my_qpn; + u32 reserved1; + __be32 rqpn; + u8 sl; + u8 g_path; + u16 reserved2[2]; + __be16 pkey; + u32 reserved3[11]; + u8 grh[40]; + } *ext_info; + + memset(inbox + 256, 0, 256); + ext_info = (void*)(inbox + 256); + + if ( in_wc->opaque ) + ext_info->my_qpn = cpu_to_be32( ((struct ib_qp*)in_wc->opaque)->qp_num ); + ext_info->rqpn = in_wc->recv.ud.remote_qp; + ext_info->sl = in_wc->recv.ud.remote_sl << 4; + ext_info->g_path = in_wc->recv.ud.path_bits | + (in_wc->recv.ud.recv_opt & IB_RECV_OPT_GRH_VALID ? 0x80 : 0); + ext_info->pkey = cpu_to_be16(in_wc->recv.ud.pkey_index); + + if (in_grh) + memcpy(ext_info->grh, in_grh, 40); + + op_modifier |= 0x4; + + in_modifier |= be16_to_cpu(in_wc->recv.ud.remote_lid) << 16; + } + + err = mlx4_cmd_box(dev->dev, inmailbox->dma.da, outmailbox->dma.da, + in_modifier, op_modifier, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); + + if (!err) + memcpy(response_mad, outmailbox->buf, 256); + +// mlx4_dbg( dev->dev, "[MLX4_BUS] mlx4_MAD_IFC : port %d, err %d \n", port, err ); + + mlx4_free_cmd_mailbox(dev->dev, inmailbox); + mlx4_free_cmd_mailbox(dev->dev, outmailbox); + + return err; +} + +static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) +{ + struct ib_ah *new_ah; + struct ib_ah_attr ah_attr; + + if (!dev->send_agent[port_num - 1][0]) + return; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = lid; + ah_attr.sl = sl; + ah_attr.port_num = port_num; + + new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, + &ah_attr); + if (IS_ERR(new_ah)) + return; + + spin_lock(&dev->sm_lock); + if (dev->sm_ah[port_num - 1]) + ib_destroy_ah(dev->sm_ah[port_num - 1]); + dev->sm_ah[port_num - 1] = new_ah; + spin_unlock(&dev->sm_lock); +} + +/* + * Snoop SM MADs for port info and P_Key table sets, so we can + * synthesize LID change and P_Key change events. + */ +static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) +{ + struct ib_event event; + + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_SET) { + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { + struct ib_port_info *pinfo = + (struct ib_port_info *) ((struct ib_smp *) mad)->data; + + update_sm_ah(to_mdev(ibdev), port_num, + be16_to_cpu(pinfo->sm_lid), + pinfo->neighbormtu_mastersmsl & 0xf); + + event.device = ibdev; + event.element.port_num = port_num; + + if(pinfo->clientrereg_resv_subnetto & 0x80) + event.event = IB_EVENT_CLIENT_REREGISTER; + else + event.event = IB_EVENT_LID_CHANGE; + + ib_dispatch_event(&event); + } + + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { + event.device = ibdev; + event.event = IB_EVENT_PKEY_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + } +} + +static void node_desc_override(struct ib_device *dev, + struct ib_mad *mad) +{ + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && + mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { + spin_lock(&to_mdev(dev)->sm_lock); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + spin_unlock(&to_mdev(dev)->sm_lock); + } +} + +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + ib_wc_t *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + u16 slid; + int err; + + slid = in_wc ? be16_to_cpu(in_wc->recv.ud.remote_lid) : be16_to_cpu(XIB_LID_PERMISSIVE); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { + // we never comes here ! + ASSERT(0); + MLX4_PRINT( TRACE_LEVEL_ERROR ,MLX4_DBG_MAD , + (" Received a trap from HCA, which is unexpected here !\n" )); + // forward_trap(to_mdev(ibdev), port_num, in_mad); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } + + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* + * Don't process SMInfo queries or vendor-specific + * MADs -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + IB_SMP_ATTR_VENDOR_MASK)) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else + return IB_MAD_RESULT_SUCCESS; + + err = mlx4_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + if (!out_mad->mad_hdr.status) { + smp_snoop(ibdev, port_num, in_mad); + node_desc_override(ibdev, out_mad); + } + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/main.c b/branches/ConnectX/hw/mlx4/kernel/ib/main.c new file mode 100644 index 00000000..538cb57a --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/main.c @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" +#include "ib_smi.h" +#include "driver.h" +#include "cmd.h" +#include "user.h" +#include "ib_cache.h" + +#define DRV_NAME "mlx4_ib" + +static void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +static int mlx4_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memset(props, 0, sizeof *props); + + props->fw_ver = dev->dev->caps.fw_ver; + props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) + props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) + props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + + props->max_mr_size = ~0ull; + props->page_size_cap = dev->dev->caps.page_size_cap; + props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; + props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; + + props->max_sge = min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg); + props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; + props->max_cqe = dev->dev->caps.max_cqes; + props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; + props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; + props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; + props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; + props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; + props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->local_ca_ack_delay = (u8)dev->dev->caps.local_ca_ack_delay; + props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NON; + props->max_pkeys = (u16)dev->dev->caps.pkey_table_len[1]; + props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; + props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1; + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = (u16)to_mdev(ibdev)->dev->caps.pkey_table_len[port]; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + spin_lock(&to_mdev(ibdev)->sm_lock); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock(&to_mdev(ibdev)->sm_lock); + } + + return 0; +} + +static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, + u32 cap_mask) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, 256); + + if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + *(u8 *) mailbox->buf = (u8)(!!reset_qkey_viols << 6); + ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); + } else { + ((u8 *) mailbox->buf)[3] = (u8)!!reset_qkey_viols; + ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); + } + + err = mlx4_cmd(dev->dev, mailbox->dma.da, port, 0, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev->dev, mailbox); + return err; +} + +static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct ib_port_attr attr; + u32 cap_mask; + int err; + + mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); + + err = mlx4_ib_query_port(ibdev, port, &attr); + if (err) + goto out; + + cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mlx4_SET_PORT(to_mdev(ibdev), port, + !!(mask & IB_PORT_RESET_QKEY_CNTR), + cap_mask); + +out: + mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); + return err; +} + +static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_ucontext *context; + struct mlx4_ib_alloc_ucontext_resp resp; + int err; + + resp.qp_tab_size = dev->dev->caps.num_qps; + resp.bf_reg_size = (__u16)dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = (__u16)dev->dev->caps.bf_regs_per_page; + + context = kzalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + if (mlx4_is_livefish(to_mdev(ibdev)->dev)) + goto done; + + err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + INIT_LIST_HEAD(&context->db_page_list); + mutex_init(&context->db_page_mutex); + +done: + err = ib_copy_to_udata(udata, &resp, sizeof resp); + if (err) { + mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; +} + +static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); + + if (!mlx4_is_livefish(to_mdev(ibcontext->device)->dev)) + mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); + kfree(context); + + return 0; +} + +#if 0 + // TODO: not clear, what is the usage +static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct mlx4_ib_dev *dev = to_mdev(context->device); + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (vma->vm_pgoff == 0) { + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { + /* FIXME want pgprot_writecombine() for BlueFlame pages */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn + + dev->dev->caps.num_uars, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + } else + return -EINVAL; + + return 0; +} +#endif + +static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_pd *pd; + int err; + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + if (mlx4_is_livefish(to_mdev(ibdev)->dev)) + goto done; + + err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) + if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { + mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); + kfree(pd); + return ERR_PTR(-EFAULT); + } + +done: + return &pd->ibpd; +} + +static int mlx4_ib_dealloc_pd(struct ib_pd *pd) +{ + if (!mlx4_is_livefish(to_mdev(pd->device)->dev)) + mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); + kfree(pd); + + return 0; +} + +static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + UNUSED_PARAM(lid); + return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, + &to_mqp(ibqp)->mqp, gid->raw); +} + +static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + UNUSED_PARAM(lid); + return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, + &to_mqp(ibqp)->mqp, gid->raw); +} + +static int init_node_data(struct mlx4_ib_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static void *mlx4_ib_add(struct mlx4_dev *dev) +{ + struct mlx4_ib_dev *ibdev; + + ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); + if (!ibdev) { + dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); + return NULL; + } + + MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); + + INIT_LIST_HEAD(&ibdev->pgdir_list); + mutex_init(&ibdev->pgdir_mutex); + + ibdev->dev = dev; + + strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); + ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; + ibdev->ib_dev.phys_port_cnt = (u8)dev->caps.num_ports; + ibdev->ib_dev.num_comp_vectors = 1; + ibdev->ib_dev.dma_device = dev->pdev->dev; + + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + ibdev->ib_dev.query_device = mlx4_ib_query_device; + ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.query_gid = mlx4_ib_query_gid; + ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; + ibdev->ib_dev.modify_device = mlx4_ib_modify_device; + ibdev->ib_dev.modify_port = mlx4_ib_modify_port; + ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; + ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; + ibdev->ib_dev.mmap = NULL; /* mlx4_ib_mmap; */ + ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; + ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; + ibdev->ib_dev.create_ah = mlx4_ib_create_ah; + ibdev->ib_dev.query_ah = mlx4_ib_query_ah; + ibdev->ib_dev.modify_ah = mlx4_ib_modify_ah; + ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; + ibdev->ib_dev.create_srq = mlx4_ib_create_srq; + ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; + ibdev->ib_dev.query_srq = mlx4_ib_query_srq; + ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; + ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; + ibdev->ib_dev.create_qp = mlx4_ib_create_qp; + ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; + ibdev->ib_dev.query_qp = mlx4_ib_query_qp; + ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; + ibdev->ib_dev.post_send = mlx4_ib_post_send; + ibdev->ib_dev.post_recv = mlx4_ib_post_recv; + ibdev->ib_dev.create_cq = mlx4_ib_create_cq; + ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; + ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; + ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; + ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; + ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; + ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; + ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; + ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; + ibdev->ib_dev.process_mad = mlx4_ib_process_mad; + + ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; + ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; + ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; + ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + ibdev->ib_dev.x.find_cached_gid = ib_find_cached_gid; + ibdev->ib_dev.x.find_cached_pkey = ib_find_cached_pkey; + ibdev->ib_dev.x.get_cached_gid = ib_get_cached_gid; + ibdev->ib_dev.x.get_cached_pkey = ib_get_cached_pkey; + + if (mlx4_is_livefish(ibdev->dev)) + return ibdev; + + if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) + goto err_dealloc; + + if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) + goto err_pd; + + ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!ibdev->uar_map) + goto err_uar; + + if (init_node_data(ibdev)) + goto err_map; + + spin_lock_init(&ibdev->sm_lock); + mutex_init(&ibdev->cap_mask_mutex); + + if (ib_register_device(&ibdev->ib_dev)) + goto err_map; + + mlx4_dbg(ibdev->dev, "MLX4_BUS: IB interface is ADDED ! \n"); + + return ibdev; + +err_map: + iounmap(ibdev->uar_map, PAGE_SIZE); + +err_uar: + mlx4_uar_free(dev, &ibdev->priv_uar); + +err_pd: + mlx4_pd_free(dev, ibdev->priv_pdn); + +err_dealloc: + ib_dealloc_device(&ibdev->ib_dev); + + return NULL; +} + +static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) +{ + struct mlx4_ib_dev *ibdev = ibdev_ptr; + int p; + + if (mlx4_is_livefish(ibdev->dev)) + goto dealloc_dev; + + for (p = 1; p <= dev->caps.num_ports; ++p) + mlx4_CLOSE_PORT(dev, p); + + ib_unregister_device(&ibdev->ib_dev); + iounmap(ibdev->uar_map,PAGE_SIZE); + mlx4_uar_free(dev, &ibdev->priv_uar); + mlx4_pd_free(dev, ibdev->priv_pdn); +dealloc_dev: + ib_dealloc_device(&ibdev->ib_dev); + mlx4_dbg(ibdev->dev, "MLX4_BUS: IB interface is REMOVED ! \n"); +} + +static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, + enum mlx4_dev_event event, int subtype, + int port) +{ + struct ib_event ibev; + + UNUSED_PARAM(dev); + + switch (event) { + case MLX4_EVENT_TYPE_PORT_CHANGE: + ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + break; + + case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR: + ibev.event = IB_EVENT_DEVICE_FATAL; + break; + + default: + return; + } + + ibev.device = ibdev_ptr; + ibev.element.port_num = (u8)port; + + ib_dispatch_event(&ibev); +} + +static struct mlx4_interface mlx4_ib_interface = { + mlx4_ib_add, /* add */ + mlx4_ib_remove, /* remove */ + mlx4_ib_event, /* event */ + NULL, NULL /* list */ +}; + +int __init mlx4_ib_init(void) +{ + mlx4_ib_qp_init(); + return mlx4_register_interface(&mlx4_ib_interface); +} + +void __exit mlx4_ib_cleanup(void) +{ + mlx4_unregister_interface(&mlx4_ib_interface); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/makefile b/branches/ConnectX/hw/mlx4/kernel/ib/makefile new file mode 100644 index 00000000..a0c06273 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\..\inc\openib.def diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/mlx4_ib.h b/branches/ConnectX/hw/mlx4/kernel/ib/mlx4_ib.h new file mode 100644 index 00000000..a61af779 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/mlx4_ib.h @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_IB_H +#define MLX4_IB_H + +#include "l2w.h" + +#include "ib_verbs.h" +// TODO: do we need this file +//#include "ib_umem.h" + +#include "device.h" +#include "doorbell.h" + +enum { + MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4 +}; + +enum { + MLX4_IB_SQ_MIN_WQE_SHIFT = 6 +}; + +#define MLX4_IB_SQ_HEADROOM(shift) ((2048 >> (shift)) + 1) +#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) + +struct mlx4_ib_db_pgdir; +struct mlx4_ib_user_db_page; + +struct mlx4_ib_db { + __be32 *db; + union { + struct mlx4_ib_db_pgdir *pgdir; + struct mlx4_ib_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; + int order; +}; + +struct mlx4_ib_ucontext { + struct ib_ucontext ibucontext; + struct mlx4_uar uar; + struct list_head db_page_list; + struct mutex db_page_mutex; +}; + +struct mlx4_ib_pd { + struct ib_pd ibpd; + u32 pdn; +}; + +struct mlx4_ib_cq_buf { + struct mlx4_buf buf; + struct mlx4_mtt mtt; +}; + +struct mlx4_ib_cq { + struct ib_cq ibcq; + struct mlx4_cq mcq; + struct mlx4_ib_cq_buf buf; + struct mlx4_ib_db db; + spinlock_t lock; + struct ib_umem *umem; +}; + +struct mlx4_ib_mr { + struct ib_mr ibmr; + struct mlx4_mr mmr; + struct ib_umem *umem; +}; + +struct mlx4_ib_fmr { + struct ib_fmr ibfmr; + struct mlx4_fmr mfmr; +}; + +struct mlx4_ib_wq { + u64 *wrid; + spinlock_t lock; + int wqe_cnt; + int max_post; + int max_gs; + int offset; + int wqe_shift; + unsigned head; + unsigned tail; +}; + +struct mlx4_ib_qp { + struct ib_qp ibqp; + struct mlx4_qp mqp; + struct mlx4_buf buf; + + struct mlx4_ib_db db; + struct mlx4_ib_wq rq; + + u32 doorbell_qpn; + __be32 sq_signal_bits; + int sq_spare_wqes; + struct mlx4_ib_wq sq; + + struct ib_umem *umem; + struct mlx4_mtt mtt; + int buf_size; + struct mutex mutex; + u8 port; + u8 alt_port; + u8 atomic_rd_en; + u8 resp_depth; + u8 sq_no_prefetch; + u8 state; +}; + +struct mlx4_ib_srq { + struct ib_srq ibsrq; + struct mlx4_srq msrq; + struct mlx4_buf buf; + struct mlx4_ib_db db; + u64 *wrid; + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct ib_umem *umem; + struct mlx4_mtt mtt; + struct mutex mutex; +}; + +struct mlx4_ib_ah { + struct ib_ah ibah; + struct mlx4_av av; +}; + +struct mlx4_ib_dev { + struct ib_device ib_dev; + struct mlx4_dev *dev; + void __iomem *uar_map; + + struct list_head pgdir_list; + struct mutex pgdir_mutex; + + struct mlx4_uar priv_uar; + u32 priv_pdn; + MLX4_DECLARE_DOORBELL_LOCK(uar_lock); + + struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; + struct ib_ah *sm_ah[MLX4_MAX_PORTS]; + spinlock_t sm_lock; + + struct mutex cap_mask_mutex; +}; + +static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mlx4_ib_dev, ib_dev); +} + +static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext); +} + +static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mlx4_ib_pd, ibpd); +} + +static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mlx4_ib_cq, ibcq); +} + +static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq) +{ + return container_of(mcq, struct mlx4_ib_cq, mcq); +} + +static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mlx4_ib_mr, ibmr); +} + +static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); +} +static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mlx4_ib_qp, ibqp); +} + +static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp) +{ + return container_of(mqp, struct mlx4_ib_qp, mqp); +} + +static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mlx4_ib_srq, ibsrq); +} + +static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq) +{ + return container_of(msrq, struct mlx4_ib_srq, msrq); +} + +static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx4_ib_ah, ibah); +} + +int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order); +void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db); +int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, u64 virt, + struct mlx4_ib_db *db); +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db); + +struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); +int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, + struct ib_umem *umem); +struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int mlx4_ib_dereg_mr(struct ib_mr *mr); + +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *context, + struct ib_udata *udata); +int mlx4_ib_destroy_cq(struct ib_cq *cq); +int mlx4_ib_poll_cq(struct ib_cq *ibcq, ib_wc_t** const pp_free_wclist, + ib_wc_t** const pp_done_wclist ); +int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); +void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr *ah_attr ); +int mlx4_ib_destroy_ah(struct ib_ah *ah); + +struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); +int mlx4_ib_destroy_srq(struct ib_srq *srq); +void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, ib_recv_wr_t *wr, + ib_recv_wr_t **bad_wr); + +struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_qp(struct ib_qp *qp); +int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr, + ib_send_wr_t **bad_wr); +int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr, + ib_recv_wr_t **bad_wr); + +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, ib_wc_t *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad); +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + ib_wc_t *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); +void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); + +struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); +int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, + u64 iova); +int mlx4_ib_unmap_fmr(struct list_head *fmr_list); +int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); + +void mlx4_ib_qp_init(); + +int __init mlx4_ib_init(void); +void __exit mlx4_ib_cleanup(void); + +static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) +{ + return !!(ah->av.g_slid & 0x80); +} + +#endif /* MLX4_IB_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/mr.c b/branches/ConnectX/hw/mlx4/kernel/ib/mr.c new file mode 100644 index 00000000..fd70ea1a --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/mr.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" + +static u32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + MLX4_PERM_LOCAL_READ; +} + +struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, + ~0ull, convert_access(acc), 0, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, + struct ib_umem *p_ib_umem) +{ + u64 *pages; + iobuf_iter_t iobuf_iter; + u32 i, n; + int err; + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = n = err = 0; + + iobuf_iter_init( &p_ib_umem->iobuf, &iobuf_iter ); + for (;;) { + // get up to max_buf_list_size page physical addresses + i = iobuf_get_tpt_seg( &p_ib_umem->iobuf, &iobuf_iter, + PAGE_SIZE / sizeof (u64), pages ); + if (!i) + break; + + // TODO: convert physical adresses to dma one's + + // write 'i' dma addresses + err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); + if (err) + goto out; + n += i; + if (n >= p_ib_umem->iobuf.nr_pages) + break; + } + + CL_ASSERT(n == p_ib_umem->iobuf.nr_pages); + +out: + free_page(pages); + return err; +} + +struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int shift; + int err; + int n; + + UNUSED_PARAM(udata); + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(pd->p_uctx, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + goto err_free; + } + + n = ib_umem_page_count(mr->umem); + shift = ilog2(mr->umem->page_size); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, + convert_access(access_flags), n, shift, &mr->mmr); + if (err) + goto err_umem; + + err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); + if (err) + goto err_mr; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +int mlx4_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + + mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); + + return 0; +} + +struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, + struct ib_fmr_attr *fmr_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_fmr *fmr; + int err = -ENOMEM; + + fmr = kmalloc(sizeof *fmr, GFP_KERNEL); + if (!fmr) + return ERR_PTR(-ENOMEM); + + err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift, &fmr->mfmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + if (err) + goto err_mr; + + fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; + + return &fmr->ibfmr; + +err_mr: + mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + +err_free: + kfree(fmr); + + return ERR_PTR(err); +} + +int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int npages, u64 iova) +{ + struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); + struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); + + return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, + &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); +} + +int mlx4_ib_unmap_fmr(struct list_head *fmr_list) +{ + struct ib_fmr *ibfmr; + int err; + struct mlx4_dev *mdev = NULL; + + list_for_each_entry(ibfmr, fmr_list, list, struct ib_fmr) { + if (mdev && to_mdev(ibfmr->device)->dev != mdev) + return -EINVAL; + mdev = to_mdev(ibfmr->device)->dev; + } + + if (!mdev) + return 0; + + list_for_each_entry(ibfmr, fmr_list, list, struct ib_fmr) { + struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); + + mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); + } + + /* + * Make sure all MPT status updates are visible before issuing + * SYNC_TPT firmware command. + */ + wmb(); + + err = mlx4_SYNC_TPT(mdev); + if (err) + printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when " + "unmapping FMRs\n", err); + + return 0; +} + +int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) +{ + struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); + struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); + int err; + + err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); + + if (!err) + kfree(ifmr); + + return err; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/qp.c b/branches/ConnectX/hw/mlx4/kernel/ib/qp.c new file mode 100644 index 00000000..228f70ea --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/qp.c @@ -0,0 +1,1726 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" +#include "ib_cache.h" +#include "ib_pack.h" +#include "qp.h" +#include "user.h" + +enum { + MLX4_IB_ACK_REQ_FREQ = 8, +}; + +enum { + MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f +}; + +enum { + /* + * Largest possible UD header: send with GRH and immediate data. + */ + MLX4_IB_UD_HEADER_SIZE = 72 +}; + +struct mlx4_ib_sqp { + struct mlx4_ib_qp qp; + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; +}; + +enum { + MLX4_IB_MIN_SQ_STRIDE = 6 +}; + +static const __be32 mlx4_ib_opcode[] = { + __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), /* [IB_WR_RDMA_WRITE] */ + __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), /* [IB_WR_RDMA_WRITE_WITH_IMM] */ + __constant_cpu_to_be32(MLX4_OPCODE_SEND), /* [IB_WR_SEND] */ + __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), /* [IB_WR_SEND_WITH_IMM] */ + __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), /* [IB_WR_RDMA_READ] */ + __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), /* [IB_WR_ATOMIC_CMP_AND_SWP] */ + __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), /* [IB_WR_ATOMIC_FETCH_AND_ADD]*/ +}; + +static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) +{ + return container_of(mqp, struct mlx4_ib_sqp, qp); +} + +static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + return qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; +} + +static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + return qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; +} + +static void *get_wqe(struct mlx4_ib_qp *qp, int offset) +{ + if (qp->buf.nbufs == 1) + return qp->buf.u.direct.buf + offset; + else + return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} + +static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); +} + +static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); +} + +/* + * Stamp a SQ WQE so that it is invalid if prefetched by marking the + * first four bytes of every 64 byte chunk with 0xffffffff, except for + * the very first chunk of the WQE. + */ +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) +{ + u32 *wqe = get_send_wqe(qp, n); + int i; + + for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) + wqe[i] = 0xffffffff; +} + +static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) +{ + struct ib_event event; + struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; + + if (type == MLX4_EVENT_TYPE_PATH_MIG) + to_mibqp(qp)->port = to_mibqp(qp)->alt_port; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case MLX4_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case MLX4_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case MLX4_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case MLX4_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + "on QP %06x\n", type, qp->qpn); + return; + } + + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int send_wqe_overhead(enum ib_qp_type type) +{ + /* + * UD WQEs must have a datagram segment. + * RC and UC WQEs might have a remote address segment. + * MLX WQEs need two extra inline data segments (for the UD + * header and space for the ICRC). + */ + switch (type) { + case IB_QPT_UD: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg); + case IB_QPT_UC: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_raddr_seg); + case IB_QPT_RC: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_raddr_seg); + case IB_QPT_SMI: + case IB_QPT_GSI: + return sizeof (struct mlx4_wqe_ctrl_seg) + + ALIGN(MLX4_IB_UD_HEADER_SIZE + + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, + MLX4_INLINE_ALIGN) * + sizeof (struct mlx4_wqe_inline_seg), + sizeof (struct mlx4_wqe_data_seg)) + + ALIGN(4 + + sizeof (struct mlx4_wqe_inline_seg), + sizeof (struct mlx4_wqe_data_seg)); + default: + return sizeof (struct mlx4_wqe_ctrl_seg); + } +} + +static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + int is_user, int has_srq, struct mlx4_ib_qp *qp) +{ + /* Sanity check RQ size before proceeding */ + if ((int)cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || + (int)cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) + return -EINVAL; + + if (has_srq) { + /* QPs attached to an SRQ should have no RQ */ + if (cap->max_recv_wr) + return -EINVAL; + + qp->rq.wqe_cnt = qp->rq.max_gs = 0; + } else { + /* HW requires >= 1 RQ entry with >= 1 gather entry */ + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + return -EINVAL; + + qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); + qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + } + + /* leave userspace return values as they were, so as not to break ABI */ + if (is_user) { + cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; + cap->max_recv_sge = qp->rq.max_gs; + } else { + cap->max_recv_wr = qp->rq.max_post = + min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); + cap->max_recv_sge = min(qp->rq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); + } + /* We don't support inline sends for kernel QPs (yet) */ + + return 0; +} + +static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + enum ib_qp_type type, struct mlx4_ib_qp *qp) +{ + /* Sanity check SQ size before proceeding */ + if ((int)cap->max_send_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || + (int)cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || + cap->max_inline_data + send_wqe_overhead(type) + + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; + + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && + (int)cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) + return -EINVAL; + + qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * + sizeof (struct mlx4_wqe_data_seg), + cap->max_inline_data + + sizeof (struct mlx4_wqe_inline_seg)) + + send_wqe_overhead(type))); + qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift); + qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / + sizeof (struct mlx4_wqe_data_seg); + + /* + * We need to leave 2 KB + 1 WQE of headroom in the SQ to + * allow HW to prefetch. + */ + qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift); + qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + if (qp->rq.wqe_shift > qp->sq.wqe_shift) { + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + } else { + qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; + qp->sq.offset = 0; + } + + cap->max_send_wr = qp->sq.max_post = + min(qp->sq.wqe_cnt - qp->sq_spare_wqes, + dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE); + cap->max_send_sge = min(qp->sq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); + /* We don't support inline sends for kernel QPs (yet) */ + cap->max_inline_data = 0; + + return 0; +} + +static int set_user_sq_size(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp, + struct mlx4_ib_create_qp *ucmd) +{ + /* Sanity check SQ size before proceeding */ + if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || + ucmd->log_sq_stride > + ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || + ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) + return -EINVAL; + + qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; + qp->sq.wqe_shift = ucmd->log_sq_stride; + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + + return 0; +} + +static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) +{ + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + qp->state = XIB_QPS_RESET; + qp->atomic_rd_en = 0; + qp->resp_depth = 0; + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + + err = set_rq_size(dev, &init_attr->cap, !!pd->p_uctx, !!init_attr->srq, qp); + if (err) + goto err; + + if (pd->p_uctx) { + struct mlx4_ib_create_qp ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err; + } + + qp->sq_no_prefetch = ucmd.sq_no_prefetch; + + err = set_user_sq_size(dev, qp, &ucmd); + if (err) + goto err; + + qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr, + qp->buf_size, 0); + if (IS_ERR(qp->umem)) { + err = PTR_ERR(qp->umem); + goto err; + } + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), + ilog2(qp->umem->page_size), &qp->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); + if (err) + goto err_mtt; + + if (!init_attr->srq) { + err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx), + ucmd.db_addr, &qp->db); + if (err) + goto err_mtt; + } + } else { + qp->sq_no_prefetch = 0; + + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); + if (err) + goto err; + + if (!init_attr->srq) { + err = mlx4_ib_db_alloc(dev, &qp->db, 0); + if (err) + goto err; + + *qp->db.db = 0; + } + + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + err = -ENOMEM; + goto err_db; + } + + err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, + &qp->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + if (err) + goto err_mtt; + + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); + + if (!qp->sq.wrid || !qp->rq.wrid) { + err = -ENOMEM; + goto err_wrid; + } + } + + err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); + if (err) + goto err_wrid; + + /* + * Hardware wants QPN written in big-endian order (after + * shifting) for send doorbell. Precompute this value to save + * a little bit when posting sends. + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + else + qp->sq_signal_bits = 0; + + qp->mqp.event = mlx4_ib_qp_event; + + return 0; + +err_wrid: + if (pd->p_uctx) { + if (!init_attr->srq) + mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx), + &qp->db); + } else { + kfree(qp->sq.wrid); + kfree(qp->rq.wrid); + } + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &qp->mtt); + +err_buf: + if (pd->p_uctx) + ib_umem_release(qp->umem); + else + mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); + +err_db: + if (!pd->p_uctx && !init_attr->srq) + mlx4_ib_db_free(dev, &qp->db); + +err: + return err; +} + +static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) +{ + switch (state) { + case XIB_QPS_RESET: return MLX4_QP_STATE_RST; + case XIB_QPS_INIT: return MLX4_QP_STATE_INIT; + case XIB_QPS_RTR: return MLX4_QP_STATE_RTR; + case XIB_QPS_RTS: return MLX4_QP_STATE_RTS; + case XIB_QPS_SQD: return MLX4_QP_STATE_SQD; + case XIB_QPS_SQE: return MLX4_QP_STATE_SQER; + case XIB_QPS_ERR: return MLX4_QP_STATE_ERR; + default: return -1; + } +} + +static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_lock_irq(&send_cq->lock); + else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_unlock_irq(&send_cq->lock); + else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + +static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, + int is_user) +{ + struct mlx4_ib_cq *send_cq, *recv_cq; + + if (qp->state != XIB_QPS_RESET) + if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), + MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) + printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", + qp->mqp.qpn); + + send_cq = to_mcq(qp->ibqp.send_cq); + recv_cq = to_mcq(qp->ibqp.recv_cq); + + mlx4_ib_lock_cqs(send_cq, recv_cq); + + if (!is_user) { + __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); + if (send_cq != recv_cq) + __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + } + + mlx4_qp_remove(dev->dev, &qp->mqp); + + mlx4_ib_unlock_cqs(send_cq, recv_cq); + + mlx4_qp_free(dev->dev, &qp->mqp); + mlx4_mtt_cleanup(dev->dev, &qp->mtt); + + if (is_user) { + if (!qp->ibqp.srq) + mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.p_uctx), + &qp->db); + ib_umem_release(qp->umem); + } else { + kfree(qp->sq.wrid); + kfree(qp->rq.wrid); + mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); + if (!qp->ibqp.srq) + mlx4_ib_db_free(dev, &qp->db); + } +} + +struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_sqp *sqp; + struct mlx4_ib_qp *qp; + int err; + + switch (init_attr->qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + { + qp = kmalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + err = create_qp_common(dev, pd, init_attr, udata, 0, qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibqp.qp_num = qp->mqp.qpn; + + break; + } + case IB_QPT_SMI: + case IB_QPT_GSI: + { + /* Userspace is not allowed to create special QPs: */ + if (pd->p_uctx) + return ERR_PTR(-EINVAL); + + sqp = kmalloc(sizeof *sqp, GFP_KERNEL); + if (!sqp) + return ERR_PTR(-ENOMEM); + + qp = &sqp->qp; + + err = create_qp_common(dev, pd, init_attr, udata, + dev->dev->caps.sqp_start + + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + + init_attr->port_num - 1, + qp); + if (err) { + kfree(sqp); + return ERR_PTR(err); + } + + qp->port = init_attr->port_num; + qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; + + break; + } + default: + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + return &qp->ibqp; +} + +int mlx4_ib_destroy_qp(struct ib_qp *qp) +{ + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_ib_qp *mqp = to_mqp(qp); + + if (is_qp0(dev, mqp)) + mlx4_CLOSE_PORT(dev->dev, mqp->port); + + destroy_qp_common(dev, mqp, !!qp->pd->p_uctx); + + if (is_sqp(dev, mqp)) + kfree(to_msqp(mqp)); + else + kfree(mqp); + + return 0; +} + +static int to_mlx4_st(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_RC: return MLX4_QP_ST_RC; + case IB_QPT_UC: return MLX4_QP_ST_UC; + case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_SMI: + case IB_QPT_GSI: return MLX4_QP_ST_MLX; + default: return -1; + } +} + +static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MLX4_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MLX4_QP_BIT_RAE; + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MLX4_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, + int attr_mask) +{ + if (attr_mask & IB_QP_PKEY_INDEX) + sqp->pkey_index = attr->pkey_index; + if (attr_mask & IB_QP_QKEY) + sqp->qkey = attr->qkey; + if (attr_mask & IB_QP_SQ_PSN) + sqp->send_psn = attr->sq_psn; +} + +static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) +{ + path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); +} + +static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, + struct mlx4_qp_path *path, u8 port) +{ + path->grh_mylmc = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + if (ah->static_rate) { + path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; + while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) + --path->static_rate; + } else + path->static_rate = 0; + path->counter_index = 0xff; + + if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { + printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); + return -1; + } + + path->grh_mylmc |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + + return 0; +} + +static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_qp_context *context; + enum mlx4_qp_optpar optpar = 0; + int sqd_event; + int err = -EINVAL; + + context = kzalloc(sizeof *context, GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | + (to_mlx4_st(ibqp->qp_type) << 16)); + context->flags |= cpu_to_be32(1 << 8); /* DE? */ + + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) + context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); + else { + optpar |= MLX4_QP_OPTPAR_PM_STATE; + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); + break; + } + } + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || + ibqp->qp_type == IB_QPT_UD) + context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { + printk(KERN_ERR "path MTU (%u) is invalid\n", + attr->path_mtu); + goto out; + } + context->mtu_msgmax = (u8)((attr->path_mtu << 5) | + ilog2(dev->dev->caps.max_msg_sz)); + } + + if (qp->rq.wqe_cnt) + context->rq_size_stride = (u8)(ilog2(qp->rq.wqe_cnt) << 3); + context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.wqe_cnt) + context->sq_size_stride = (u8)(ilog2(qp->sq.wqe_cnt) << 3); + context->sq_size_stride |= qp->sq.wqe_shift - 4; + + if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) + context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + + if (qp->ibqp.p_uctx) + context->usr_page = cpu_to_be32(to_mucontext(ibqp->p_uctx)->uar.index); + else + context->usr_page = cpu_to_be32(dev->priv_uar.index); + + if (attr_mask & IB_QP_DEST_QPN) + context->remote_qpn = cpu_to_be32(attr->dest_qp_num); + + if (attr_mask & IB_QP_PORT) { + if (cur_state == XIB_QPS_SQD && new_state == XIB_QPS_SQD && + !(attr_mask & IB_QP_AV)) { + mlx4_set_sched(&context->pri_path, attr->port_num); + optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; + } + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + context->pri_path.pkey_index = (u8)attr->pkey_index; + optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; + } + + if (attr_mask & IB_QP_AV) { + if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) + goto out; + + optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | + MLX4_QP_OPTPAR_SCHED_QUEUE); + } + + if (attr_mask & IB_QP_TIMEOUT) { + context->pri_path.ackto = attr->timeout << 3; + optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num == 0 || + attr->alt_port_num > dev->dev->caps.num_ports) + goto out; + + if (attr->alt_pkey_index >= + dev->dev->caps.pkey_table_len[attr->alt_port_num]) + goto out; + + if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + attr->alt_port_num)) + goto out; + + context->alt_path.pkey_index = (u8)attr->alt_pkey_index; + context->alt_path.ackto = attr->alt_timeout << 3; + optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; + } + + context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); + context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + + if (attr_mask & IB_QP_RNR_RETRY) { + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + optpar |= MLX4_QP_OPTPAR_RNR_RETRY; + } + + if (attr_mask & IB_QP_RETRY_CNT) { + context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) + context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + optpar |= MLX4_QP_OPTPAR_SRA_MAX; + } + + if (attr_mask & IB_QP_SQ_PSN) + context->next_send_psn = cpu_to_be32(attr->sq_psn); + + context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic) + context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); + optpar |= MLX4_QP_OPTPAR_RRA_MAX; + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); + optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; + } + + if (ibqp->srq) + context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; + } + if (attr_mask & IB_QP_RQ_PSN) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); + + if (attr_mask & IB_QP_QKEY) { + context->qkey = cpu_to_be32(attr->qkey); + optpar |= MLX4_QP_OPTPAR_Q_KEY; + } + + if (ibqp->srq) + context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); + + if (!ibqp->srq && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) + context->db_rec_addr = cpu_to_be64(qp->db.dma.da); + + if (cur_state == XIB_QPS_INIT && + new_state == XIB_QPS_RTR && + (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || + ibqp->qp_type == IB_QPT_UD)) { + context->pri_path.sched_queue = (qp->port - 1) << 6; + if (is_qp0(dev, qp)) + context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; + else + context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; + } + + if (cur_state == XIB_QPS_RTS && new_state == XIB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) + sqd_event = 1; + else + sqd_event = 0; + + /* + * Before passing a kernel QP to the HW, make sure that the + * ownership bits of the send queue are set and the SQ + * headroom is stamped so that the hardware doesn't start + * processing stale work requests. + */ + if (!ibqp->p_uctx && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) { + struct mlx4_wqe_ctrl_seg *ctrl; + int i; + + for (i = 0; i < qp->sq.wqe_cnt; ++i) { + ctrl = get_send_wqe(qp, i); + ctrl->owner_opcode = cpu_to_be32(1 << 31); + + stamp_send_wqe(qp, i); + } + } + + err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), + to_mlx4_state(new_state), context, optpar, + sqd_event, &qp->mqp); + if (err) + goto out; + + qp->state = new_state; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = (u8)attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_port = attr->alt_port_num; + + if (is_sqp(dev, qp)) + store_sqp_attrs(to_msqp(qp), attr, attr_mask); + + /* + * If we moved QP0 to RTR, bring the IB link up; if we moved + * QP0 to RESET or ERROR, bring the link back down. + */ + if (is_qp0(dev, qp)) { + if (cur_state != XIB_QPS_RTR && new_state == XIB_QPS_RTR) + if (mlx4_INIT_PORT(dev->dev, qp->port)) + printk(KERN_WARNING "INIT_PORT failed for port %d\n", + qp->port); + + if (cur_state != XIB_QPS_RESET && cur_state != XIB_QPS_ERR && + (new_state == XIB_QPS_RESET || new_state == XIB_QPS_ERR)) + mlx4_CLOSE_PORT(dev->dev, qp->port); + } + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == XIB_QPS_RESET && !ibqp->p_uctx) { + mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq): NULL); + if (ibqp->send_cq != ibqp->recv_cq) + mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + if (!ibqp->srq) + *qp->db.db = 0; + } + +out: + kfree(context); + return err; +} + +static struct ib_qp_attr mlx4_ib_qp_attr; +static int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1]; + +void mlx4_ib_qp_init() +{ + memset( &mlx4_ib_qp_attr, 0, sizeof(mlx4_ib_qp_attr) ); + mlx4_ib_qp_attr.port_num = 1; + + memset( &mlx4_ib_qp_attr_mask_table, 0, sizeof(mlx4_ib_qp_attr_mask_table) ); + mlx4_ib_qp_attr_mask_table[IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY); + mlx4_ib_qp_attr_mask_table[IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS); + mlx4_ib_qp_attr_mask_table[IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS); + mlx4_ib_qp_attr_mask_table[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY); + mlx4_ib_qp_attr_mask_table[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY); +} + +int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + + UNUSED_PARAM(udata); + + mutex_lock(&qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + goto out; + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + goto out; + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { + goto out; + } + + if (cur_state == new_state && cur_state == XIB_QPS_RESET) { + err = 0; + goto out; + } + + if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_ERR) { + err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, + mlx4_ib_qp_attr_mask_table[ibqp->qp_type], + XIB_QPS_RESET, XIB_QPS_INIT); + if (err) + goto out; + cur_state = XIB_QPS_INIT; + } + + err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr) +{ + + enum ib_wr_opcode opcode = -1; //= wr->wr_type; + + switch (wr->wr_type) { + case WR_SEND: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_SEND_WITH_IMM : IB_WR_SEND; + break; + case WR_RDMA_WRITE: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_RDMA_WRITE_WITH_IMM : IB_WR_RDMA_WRITE; + break; + case WR_RDMA_READ: opcode = IB_WR_RDMA_READ; break; + case WR_COMPARE_SWAP: opcode = IB_WR_ATOMIC_CMP_AND_SWP; break; + case WR_FETCH_ADD: opcode = IB_WR_ATOMIC_FETCH_AND_ADD; break; + } + return opcode; +} + +static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr, + void *wqe) +{ + enum ib_wr_opcode opcode = to_wr_opcode(wr); + struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; + struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx); + struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av); + u16 pkey; + int send_size; + int header_size; + int spc; + u32 i; + + send_size = 0; + for (i = 0; i < wr->num_ds; ++i) + send_size += wr->ds_array[i].length; + + ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); + + sqp->ud_header.lrh.service_level = + (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28); + sqp->ud_header.lrh.destination_lid = ah->av.dlid; + sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); + if (mlx4_ib_ah_grh_present(ah)) { + sqp->ud_header.grh.traffic_class = + (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff); + sqp->ud_header.grh.flow_label = + ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.hop_limit; + ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24), + ah->av.gid_index, &sqp->ud_header.grh.source_gid); + memcpy(sqp->ud_header.grh.destination_gid.raw, + ah->av.dgid, 16); + } + + mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + + switch (opcode) { + case IB_WR_SEND: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + break; + case IB_WR_SEND_WITH_IMM: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + sqp->ud_header.immediate_present = 1; + sqp->ud_header.immediate_data = wr->immediate_data; + break; + default: + return -EINVAL; + } + + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED)); + if (!sqp->qp.ibqp.qp_num) + ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); + else + ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); + sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp; + sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ? + cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey; + sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + + header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); + +#if 0 + { + printk(KERN_ERR "built UD header of size %d:\n", header_size); + for (i = 0; i < header_size / 4; ++i) { + if (i % 8 == 0) + printk(" [%02x] ", i * 4); + printk(" %08x", + be32_to_cpu(((__be32 *) sqp->header_buf)[i])); + if ((i + 1) % 8 == 0) + printk("\n"); + } + printk("\n"); + } +#endif + + /* + * Inline data segments may not cross a 64 byte boundary. If + * our UD header is bigger than the space available up to the + * next 64 byte boundary in the WQE, use two inline data + * segments to hold the UD header. + */ + spc = MLX4_INLINE_ALIGN - + ((u32)(ULONG_PTR)(inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (header_size <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + memcpy(inl + 1, sqp->header_buf, header_size); + i = 1; + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + memcpy(inl + 1, sqp->header_buf, spc); + + inl = (void*)((u8*)(inl + 1) + spc); + memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); + /* + * Need a barrier here to make sure all the data is + * visible before the byte_count field is set. + * Otherwise the HCA prefetcher could grab the 64-byte + * chunk with this inline segment and get a valid (!= + * 0xffffffff) byte count but stale data, and end up + * generating a packet with bad headers. + * + * The first inline segment's byte_count field doesn't + * need a barrier, because it comes after a + * control/MLX segment and therefore is at an offset + * of 16 mod 64. + */ + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + i = 2; + } + + return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); +} + +static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) +{ + unsigned cur; + struct mlx4_ib_cq *cq; + + cur = wq->head - wq->tail; + if (likely((int)cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return (int)cur + nreq >= wq->max_post; +} + +static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + u64 remote_addr, __be32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = rkey; + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr) +{ + if (wr->wr_type == WR_COMPARE_SWAP) { + aseg->swap_add = wr->remote_ops.atomic2; + aseg->compare = wr->remote_ops.atomic1; + } else { + aseg->swap_add = wr->remote_ops.atomic1; + aseg->compare = 0; + } + +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, + ib_send_wr_t *wr) +{ + memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av)); + dseg->dqpn = wr->dgrm.ud.remote_qp; + dseg->qkey = wr->dgrm.ud.remote_qkey; +} + +static void set_mlx_icrc_seg(void *dseg) +{ + u32 *t = dseg; + struct mlx4_wqe_inline_seg *iseg = dseg; + + t[1] = 0; + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + iseg->byte_count = cpu_to_be32((1 << 31) | 4); +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg) +{ + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->vaddr); + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + dseg->byte_count = cpu_to_be32(sg->length); +} + +static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->vaddr); +} + +int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr, + ib_send_wr_t **bad_wr) +{ + enum ib_wr_opcode opcode; + struct mlx4_ib_qp *qp = to_mqp(ibqp); + u8 *wqe; + struct mlx4_wqe_ctrl_seg *ctrl; + struct mlx4_wqe_data_seg *dseg; + unsigned long flags; + int nreq; + int err = 0; + int ind; + int size; + int i; + + spin_lock_irqsave(&qp->sq.lock, &flags); + + ind = qp->sq.head; + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_ds > (u32)qp->sq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + ctrl = (void*)wqe; + qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + opcode = to_wr_opcode(wr); + + ctrl->srcrb_flags = + (wr->send_opt & IB_SEND_OPT_SIGNALED ? + cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | + (wr->send_opt & IB_SEND_OPT_SOLICITED ? + cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | + qp->sq_signal_bits; + + if (opcode == IB_WR_SEND_WITH_IMM || + opcode == IB_WR_RDMA_WRITE_WITH_IMM) + ctrl->imm = wr->immediate_data; + else + ctrl->imm = 0; + + wqe += sizeof *ctrl; + size = sizeof *ctrl / 16; + + switch (ibqp->qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + switch (opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + set_raddr_seg((void*)wqe, wr->remote_ops.vaddr, + wr->remote_ops.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_atomic_seg((void*)wqe, wr); + wqe += sizeof (struct mlx4_wqe_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_atomic_seg)) / 16; + + break; + + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg((void*)wqe, wr->remote_ops.vaddr, + wr->remote_ops.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + size += sizeof (struct mlx4_wqe_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + break; + + case IB_QPT_UD: + set_datagram_seg((void*)wqe, wr); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + break; + + case IB_QPT_SMI: + case IB_QPT_GSI: + err = build_mlx_header(to_msqp(qp), wr, ctrl); + if (err < 0) { + *bad_wr = wr; + goto out; + } + wqe += err; + size += err / 16; + + err = 0; + break; + + default: + break; + } + + /* + * Write data segments in reverse order, so as to + * overwrite cacheline stamp last within each + * cacheline. This avoids issues with WQE + * prefetching. + */ + + dseg = (void*)wqe; + dseg += wr->num_ds - 1; + size += wr->num_ds * (sizeof (struct mlx4_wqe_data_seg) / 16); + + /* Add one more inline data segment for ICRC for MLX sends */ + if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI)) { + set_mlx_icrc_seg(dseg + 1); + size += sizeof (struct mlx4_wqe_data_seg) / 16; + } + + for (i = wr->num_ds - 1; i >= 0; --i, --dseg) + set_data_seg(dseg, wr->ds_array + i); + + ctrl->fence_size = (u8)((wr->send_opt & IB_SEND_OPT_FENCE ? + MLX4_WQE_CTRL_FENCE : 0) | size); + + /* + * Make sure descriptor is fully written before + * setting ownership bit (because HW can start + * executing as soon as we do). + */ + wmb(); + + if (opcode < 0 || opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { + err = -EINVAL; + goto out; + } + + ctrl->owner_opcode = mlx4_ib_opcode[opcode] | + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + + /* + * We can improve latency by not stamping the last + * send queue WQE until after ringing the doorbell, so + * only stamp here if there are still more WQEs to post. + */ + if (wr->p_next) + stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & + (qp->sq.wqe_cnt - 1)); + + ++ind; + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + writel(qp->doorbell_qpn, + (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); + +#if 0 + if (qp->mqp.qpn == 0x41) + DbgPrint( "[MLX4_BUS] mlx4_ib_post_send : qtype %d, qpn %#x, nreq %d, sq.head %#x, wqe_ix %d, db %p \n", + ibqp->qp_type, qp->mqp.qpn, nreq, qp->sq.head, ind, + (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL ); +#endif + /* + * Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + mmiowb(); + + stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & + (qp->sq.wqe_cnt - 1)); + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr, + ib_recv_wr_t **bad_wr) +{ + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&qp->rq.lock, &flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_ds > (u32)qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + + for (i = 0; i < (int)wr->num_ds; ++i) + __set_data_seg(scat + i, wr->ds_array + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + +#if 0 + if (qp->mqp.qpn == 0x41) + DbgPrint( "[MLX4_BUS] mlx4_ib_post_recv : qtype %d, qpn %#x, nreq %d, rq.head %#x, wqe_ix %d, db_obj %p, db %p \n", + ibqp->qp_type, qp->mqp.qpn, nreq, qp->rq.head, ind, &qp->db, qp->db.db ); +#endif + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} + +static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) +{ + switch (mlx4_state) { + case MLX4_QP_STATE_RST: return XIB_QPS_RESET; + case MLX4_QP_STATE_INIT: return XIB_QPS_INIT; + case MLX4_QP_STATE_RTR: return XIB_QPS_RTR; + case MLX4_QP_STATE_RTS: return XIB_QPS_RTS; + case MLX4_QP_STATE_SQ_DRAINING: + case MLX4_QP_STATE_SQD: return XIB_QPS_SQD; + case MLX4_QP_STATE_SQER: return XIB_QPS_SQE; + case MLX4_QP_STATE_ERR: return XIB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) +{ + switch (mlx4_mig_state) { + case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX4_QP_PM_REARM: return IB_MIG_REARM; + case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx4_flags) +{ + int ib_flags = 0; + + if (mlx4_flags & MLX4_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx4_flags & MLX4_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx4_flags & MLX4_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, + struct mlx4_qp_path *path) +{ + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); + ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (u8)((be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff); + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof ib_ah_attr->grh.dgid.raw); + } +} + +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_qp_context context; + int mlx4_state; + int err; + + UNUSED_PARAM(qp_attr_mask); + + if (qp->state == XIB_QPS_RESET) { + qp_attr->qp_state = XIB_QPS_RESET; + goto done; + } + + err = mlx4_qp_query(dev->dev, &qp->mqp, &context); + if (err) + return -EINVAL; + + mlx4_state = be32_to_cpu(context.flags) >> 28; + + qp_attr->qp_state = to_ib_qp_state(mlx4_state); + qp_attr->path_mtu = context.mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context.qkey); + qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context.params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; + if (qp_attr->qp_state == XIB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = (u8)(mlx4_state == MLX4_QP_STATE_SQ_DRAINING); + + qp_attr->max_rd_atomic = (u8)(1 << ((be32_to_cpu(context.params1) >> 21) & 0x7)); + + qp_attr->max_dest_rd_atomic = + (u8)(1 << ((be32_to_cpu(context.params2) >> 21) & 0x7)); + qp_attr->min_rnr_timer = + (u8)((be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f); + qp_attr->timeout = context.pri_path.ackto >> 3; + qp_attr->retry_cnt = (u8)((be32_to_cpu(context.params1) >> 16) & 0x7); + qp_attr->rnr_retry = (u8)((be32_to_cpu(context.params1) >> 13) & 0x7); + qp_attr->alt_timeout = context.alt_path.ackto >> 3; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + + if (!ibqp->p_uctx) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + + return 0; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/ib/srq.c b/branches/ConnectX/hw/mlx4/kernel/ib/srq.c new file mode 100644 index 00000000..192ca746 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/ib/srq.c @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" +#include "qp.h" +#include "srq.h" +#include "user.h" + +static void *get_wqe(struct mlx4_ib_srq *srq, int n) +{ + int offset = n << srq->msrq.wqe_shift; + + if (srq->buf.nbufs == 1) + return srq->buf.u.direct.buf + offset; + else + return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} + +static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) +{ + struct ib_event event; + struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (type) { + case MLX4_EVENT_TYPE_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + "on SRQ %06x\n", type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_srq *srq; + struct mlx4_wqe_srq_next_seg *next; + int desc_size; + int buf_size; + int err; + int i; + + /* Sanity check SRQ size before proceeding */ + if ((int)init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || + (int)init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) + return ERR_PTR(-EINVAL); + + srq = kmalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->msrq.max_gs = init_attr->attr.max_sge; + + desc_size = max(32UL, + roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) + + srq->msrq.max_gs * + sizeof (struct mlx4_wqe_data_seg))); + srq->msrq.wqe_shift = ilog2(desc_size); + + buf_size = srq->msrq.max * desc_size; + + if (pd->p_uctx) { + struct mlx4_ib_create_srq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr, + buf_size, 0); + if (IS_ERR(srq->umem)) { + err = PTR_ERR(srq->umem); + goto err_srq; + } + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), + ilog2(srq->umem->page_size), &srq->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem); + if (err) + goto err_mtt; + + err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx), + ucmd.db_addr, &srq->db); + if (err) + goto err_mtt; + } else { + err = mlx4_ib_db_alloc(dev, &srq->db, 0); + if (err) + goto err_srq; + + *srq->db.db = 0; + + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + err = -ENOMEM; + goto err_db; + } + + srq->head = 0; + srq->tail = srq->msrq.max - 1; + srq->wqe_ctr = 0; + + for (i = 0; i < srq->msrq.max; ++i) { + next = get_wqe(srq, i); + next->next_wqe_index = + cpu_to_be16((i + 1) & (srq->msrq.max - 1)); + } + + err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift, + &srq->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + if (err) + goto err_mtt; + + srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) { + err = -ENOMEM; + goto err_mtt; + } + } + + err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt, + srq->db.dma.da, &srq->msrq); + if (err) + goto err_wrid; + + srq->msrq.event = mlx4_ib_srq_event; + + if (pd->p_uctx) + if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { + err = -EFAULT; + goto err_wrid; + } + + init_attr->attr.max_wr = srq->msrq.max - 1; + + return &srq->ibsrq; + +err_wrid: + if (pd->p_uctx) + mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx), &srq->db); + else + kfree(srq->wrid); + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &srq->mtt); + +err_buf: + if (pd->p_uctx) + ib_umem_release(srq->umem); + else + mlx4_buf_free(dev->dev, buf_size, &srq->buf); + +err_db: + if (!pd->p_uctx) + mlx4_ib_db_free(dev, &srq->db); + +err_srq: + kfree(srq); + + return ERR_PTR(err); +} + +int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + int ret; + + UNUSED_PARAM(udata); + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & XIB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & XIB_SRQ_LIMIT) { + if ((int)attr->srq_limit >= srq->msrq.max) + return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit); + mutex_unlock(&srq->mutex); + + if (ret) + return ret; + } + + return 0; +} + +int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + int ret; + int limit_watermark; + + ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark); + if (ret) + return ret; + + srq_attr->srq_limit = limit_watermark; + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + + return 0; +} + +int mlx4_ib_destroy_srq(struct ib_srq *srq) +{ + struct mlx4_ib_dev *dev = to_mdev(srq->device); + struct mlx4_ib_srq *msrq = to_msrq(srq); + + mlx4_srq_free(dev->dev, &msrq->msrq); + mlx4_mtt_cleanup(dev->dev, &msrq->mtt); + + if (srq->p_uctx) { + mlx4_ib_db_unmap_user(to_mucontext(srq->p_uctx), &msrq->db); + ib_umem_release(msrq->umem); + } else { + kfree(msrq->wrid); + mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, + &msrq->buf); + mlx4_ib_db_free(dev, &msrq->db); + } + + kfree(msrq); + + return 0; +} + +void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) +{ + struct mlx4_wqe_srq_next_seg *next; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = cpu_to_be16(wqe_index); + srq->tail = wqe_index; + + spin_unlock(&srq->lock); +} + +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, ib_recv_wr_t *wr, + ib_recv_wr_t **bad_wr) +{ + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int i; + + spin_lock_irqsave(&srq->lock, &flags); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(wr->num_ds > (u32)srq->msrq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = be16_to_cpu(next->next_wqe_index); + scat = (struct mlx4_wqe_data_seg *) (next + 1); + + for (i = 0; i < (int)wr->num_ds; ++i) { + scat[i].byte_count = cpu_to_be32(wr->ds_array[i].length); + scat[i].lkey = cpu_to_be32(wr->ds_array[i].lkey); + scat[i].addr = cpu_to_be64(wr->ds_array[i].vaddr); + } + + if (i < srq->msrq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (likely(nreq)) { + srq->wqe_ctr = (u16)(srq->wqe_ctr + nreq); + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *srq->db.db = cpu_to_be32(srq->wqe_ctr); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return err; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/bus_intf.h b/branches/ConnectX/hw/mlx4/kernel/inc/bus_intf.h new file mode 100644 index 00000000..52db31e2 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/bus_intf.h @@ -0,0 +1,17 @@ +#pragma once + +#define MLX4_BUS_IB_INTERFACE_VERSION 1 + +// +// Interface for work with MLX4 IB driver +// +#pragma warning(disable:4201) // nameless struct/union +typedef struct _MLX4_BUS_IB_INTERFACE{ + INTERFACE; + struct ib_device * p_ibdev; + struct pci_dev * pdev; + int is_livefish; + +} MLX4_BUS_IB_INTERFACE, *PMLX4_BUS_IB_INTERFACE; +#pragma warning(default:4201) // nameless struct/union + diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/cmd.h b/branches/ConnectX/hw/mlx4/kernel/inc/cmd.h new file mode 100644 index 00000000..90e30dbe --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/cmd.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_CMD_H +#define MLX4_CMD_H + +enum { + /* initialization and general commands */ + MLX4_CMD_SYS_EN = 0x1, + MLX4_CMD_SYS_DIS = 0x2, + MLX4_CMD_MAP_FA = 0xfff, + MLX4_CMD_UNMAP_FA = 0xffe, + MLX4_CMD_RUN_FW = 0xff6, + MLX4_CMD_MOD_STAT_CFG = 0x34, + MLX4_CMD_QUERY_DEV_CAP = 0x3, + MLX4_CMD_QUERY_FW = 0x4, + MLX4_CMD_ENABLE_LAM = 0xff8, + MLX4_CMD_DISABLE_LAM = 0xff7, + MLX4_CMD_QUERY_DDR = 0x5, + MLX4_CMD_QUERY_ADAPTER = 0x6, + MLX4_CMD_INIT_HCA = 0x7, + MLX4_CMD_CLOSE_HCA = 0x8, + MLX4_CMD_INIT_PORT = 0x9, + MLX4_CMD_CLOSE_PORT = 0xa, + MLX4_CMD_QUERY_HCA = 0xb, + MLX4_CMD_QUERY_PORT = 0x43, + MLX4_CMD_SET_PORT = 0xc, + MLX4_CMD_ACCESS_DDR = 0x2e, + MLX4_CMD_MAP_ICM = 0xffa, + MLX4_CMD_UNMAP_ICM = 0xff9, + MLX4_CMD_MAP_ICM_AUX = 0xffc, + MLX4_CMD_UNMAP_ICM_AUX = 0xffb, + MLX4_CMD_SET_ICM_SIZE = 0xffd, + + /* TPT commands */ + MLX4_CMD_SW2HW_MPT = 0xd, + MLX4_CMD_QUERY_MPT = 0xe, + MLX4_CMD_HW2SW_MPT = 0xf, + MLX4_CMD_READ_MTT = 0x10, + MLX4_CMD_WRITE_MTT = 0x11, + MLX4_CMD_SYNC_TPT = 0x2f, + + /* EQ commands */ + MLX4_CMD_MAP_EQ = 0x12, + MLX4_CMD_SW2HW_EQ = 0x13, + MLX4_CMD_HW2SW_EQ = 0x14, + MLX4_CMD_QUERY_EQ = 0x15, + + /* CQ commands */ + MLX4_CMD_SW2HW_CQ = 0x16, + MLX4_CMD_HW2SW_CQ = 0x17, + MLX4_CMD_QUERY_CQ = 0x18, + MLX4_CMD_MODIFY_CQ = 0x2c, + + /* SRQ commands */ + MLX4_CMD_SW2HW_SRQ = 0x35, + MLX4_CMD_HW2SW_SRQ = 0x36, + MLX4_CMD_QUERY_SRQ = 0x37, + MLX4_CMD_ARM_SRQ = 0x40, + + /* QP/EE commands */ + MLX4_CMD_RST2INIT_QP = 0x19, + MLX4_CMD_INIT2RTR_QP = 0x1a, + MLX4_CMD_RTR2RTS_QP = 0x1b, + MLX4_CMD_RTS2RTS_QP = 0x1c, + MLX4_CMD_SQERR2RTS_QP = 0x1d, + MLX4_CMD_2ERR_QP = 0x1e, + MLX4_CMD_RTS2SQD_QP = 0x1f, + MLX4_CMD_SQD2SQD_QP = 0x38, + MLX4_CMD_SQD2RTS_QP = 0x20, + MLX4_CMD_2RST_QP = 0x21, + MLX4_CMD_QUERY_QP = 0x22, + MLX4_CMD_INIT2INIT_QP = 0x2d, + MLX4_CMD_SUSPEND_QP = 0x32, + MLX4_CMD_UNSUSPEND_QP = 0x33, + /* special QP and management commands */ + MLX4_CMD_CONF_SPECIAL_QP = 0x23, + MLX4_CMD_MAD_IFC = 0x24, + + /* multicast commands */ + MLX4_CMD_READ_MCG = 0x25, + MLX4_CMD_WRITE_MCG = 0x26, + MLX4_CMD_MGID_HASH = 0x27, + + /* miscellaneous commands */ + MLX4_CMD_DIAG_RPRT = 0x30, + MLX4_CMD_NOP = 0x31, + + /* debug commands */ + MLX4_CMD_QUERY_DEBUG_MSG = 0x2a, + MLX4_CMD_SET_DEBUG_MSG = 0x2b, +}; + +enum { + MLX4_CMD_TIME_CLASS_A = 10000, + MLX4_CMD_TIME_CLASS_B = 10000, + MLX4_CMD_TIME_CLASS_C = 10000, +}; + +enum { + MLX4_MAILBOX_SIZE = 4096 +}; + +struct mlx4_dev; + +struct mlx4_cmd_mailbox { + void *buf; + dma_addr_t dma; +}; + +int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout); + +/* Invoke a command with no output parameter */ +static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier, + u8 op_modifier, u16 op, unsigned long timeout) +{ + return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier, + op_modifier, op, timeout); +} + +/* Invoke a command with an output mailbox */ +static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param, + u32 in_modifier, u8 op_modifier, u16 op, + unsigned long timeout) +{ + return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier, + op_modifier, op, timeout); +} + +/* + * Invoke a command with an immediate output parameter (and copy the + * output into the caller's out_param pointer after the command + * executes). + */ +static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + u32 in_modifier, u8 op_modifier, u16 op, + unsigned long timeout) +{ + return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier, + op_modifier, op, timeout); +} + +struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); +void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); + +#endif /* MLX4_CMD_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/cq.h b/branches/ConnectX/hw/mlx4/kernel/inc/cq.h new file mode 100644 index 00000000..5d25370f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/cq.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_CQ_H +#define MLX4_CQ_H + +#include "device.h" +#include "doorbell.h" + +struct mlx4_cq_context { + __be32 flags; + u16 reserved1[3]; + __be16 page_offset; + __be32 logsize_usrpage; + u16 cq_period; + u16 cq_max_count; + u8 reserved4[3]; + u8 comp_eqn; + u8 log_page_size; + u8 reserved5[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + u32 reserved6[2]; + __be64 db_rec_addr; +}; + +struct mlx4_cqe { + __be32 my_qpn; + __be32 immed_rss_invalid; + __be32 g_mlpath_rqpn; + u8 sl; + u8 reserved1; + __be16 rlid; + u32 reserved2; + __be32 byte_cnt; + __be16 wqe_index; + __be16 checksum; + u8 reserved3[3]; + u8 owner_sr_opcode; +}; + +struct mlx4_err_cqe { + __be32 my_qpn; + u32 reserved1[5]; + __be16 wqe_index; + u8 vendor_err_syndrome; + u8 syndrome; + u8 reserved2[3]; + u8 owner_sr_opcode; +}; + +enum { + MLX4_CQE_OWNER_MASK = 0x80, + MLX4_CQE_IS_SEND_MASK = 0x40, + MLX4_CQE_OPCODE_MASK = 0x1f +}; + +enum { + MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, + void __iomem *uar_page, + spinlock_t *doorbell_lock) +{ + __be32 doorbell[2]; + u32 sn; + u32 ci; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + + *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cpu_to_be32(sn << 28 | cmd | cq->cqn); + doorbell[1] = cpu_to_be32(ci); + + mlx4_write64(doorbell, (u8*)uar_page + MLX4_CQ_DOORBELL, doorbell_lock); +} + +static inline void mlx4_cq_set_ci(struct mlx4_cq *cq) +{ + *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); +} + +enum { + MLX4_CQ_DB_REQ_NOT_SOL = 1 << 24, + MLX4_CQ_DB_REQ_NOT = 2 << 24 +}; + +int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, + struct mlx4_cq_context *context, int resize); + +#endif /* MLX4_CQ_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/device.h b/branches/ConnectX/hw/mlx4/kernel/inc/device.h new file mode 100644 index 00000000..36f64dee --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/device.h @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_DEVICE_H +#define MLX4_DEVICE_H + +enum { + MLX4_FLAG_MSI_X = 1 << 0, + MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, + MLX4_FLAG_LIVEFISH = 1 << 10 +}; + +enum { + MLX4_MAX_PORTS = 2 +}; + +enum { + MLX4_BOARD_ID_LEN = 64 +}; + +enum { + MLX4_DEV_CAP_FLAG_RC = 1 << 0, + MLX4_DEV_CAP_FLAG_UC = 1 << 1, + MLX4_DEV_CAP_FLAG_UD = 1 << 2, + MLX4_DEV_CAP_FLAG_SRQ = 1 << 6, + MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7, + MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, + MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, + MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, + MLX4_DEV_CAP_FLAG_APM = 1 << 17, + MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, + MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19, + MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20, + MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21 +}; + +enum mlx4_event { + MLX4_EVENT_TYPE_COMP = 0x00, + MLX4_EVENT_TYPE_PATH_MIG = 0x01, + MLX4_EVENT_TYPE_COMM_EST = 0x02, + MLX4_EVENT_TYPE_SQ_DRAINED = 0x03, + MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14, + MLX4_EVENT_TYPE_CQ_ERROR = 0x04, + MLX4_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MLX4_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MLX4_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MLX4_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MLX4_EVENT_TYPE_PORT_CHANGE = 0x09, + MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MLX4_EVENT_TYPE_ECC_DETECT = 0x0e, + MLX4_EVENT_TYPE_CMD = 0x0a +}; + +enum { + MLX4_PORT_CHANGE_SUBTYPE_DOWN = 1, + MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 +}; + +enum { + MLX4_PERM_LOCAL_READ = 1 << 10, + MLX4_PERM_LOCAL_WRITE = 1 << 11, + MLX4_PERM_REMOTE_READ = 1 << 12, + MLX4_PERM_REMOTE_WRITE = 1 << 13, + MLX4_PERM_ATOMIC = 1 << 14 +}; + +enum { + MLX4_OPCODE_NOP = 0x00, + MLX4_OPCODE_SEND_INVAL = 0x01, + MLX4_OPCODE_RDMA_WRITE = 0x08, + MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX4_OPCODE_SEND = 0x0a, + MLX4_OPCODE_SEND_IMM = 0x0b, + MLX4_OPCODE_LSO = 0x0e, + MLX4_OPCODE_RDMA_READ = 0x10, + MLX4_OPCODE_ATOMIC_CS = 0x11, + MLX4_OPCODE_ATOMIC_FA = 0x12, + MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, + MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, + MLX4_OPCODE_BIND_MW = 0x18, + MLX4_OPCODE_FMR = 0x19, + MLX4_OPCODE_LOCAL_INVAL = 0x1b, + MLX4_OPCODE_CONFIG_CMD = 0x1f, + + MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX4_RECV_OPCODE_SEND = 0x01, + MLX4_RECV_OPCODE_SEND_IMM = 0x02, + MLX4_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX4_CQE_OPCODE_ERROR = 0x1e, + MLX4_CQE_OPCODE_RESIZE = 0x16, +}; + +enum { + MLX4_STAT_RATE_OFFSET = 5 +}; + +struct mlx4_caps { + u64 fw_ver; + int num_ports; + int vl_cap[MLX4_MAX_PORTS + 1]; + int mtu_cap[MLX4_MAX_PORTS + 1]; + int gid_table_len[MLX4_MAX_PORTS + 1]; + int pkey_table_len[MLX4_MAX_PORTS + 1]; + int local_ca_ack_delay; + int num_uars; + int bf_reg_size; + int bf_regs_per_page; + int max_sq_sg; + int max_rq_sg; + int num_qps; + int max_wqes; + int max_sq_desc_sz; + int max_rq_desc_sz; + int max_qp_init_rdma; + int max_qp_dest_rdma; + int reserved_qps; + int sqp_start; + int num_srqs; + int max_srq_wqes; + int max_srq_sge; + int reserved_srqs; + int num_cqs; + int max_cqes; + int reserved_cqs; + int num_eqs; + int reserved_eqs; + int num_mpts; + int num_mtt_segs; + int fmr_reserved_mtts; + int reserved_mtts; + int reserved_mrws; + int reserved_uars; + int num_mgms; + int num_amgms; + int reserved_mcgs; + int num_qp_per_mgm; + int num_pds; + int reserved_pds; + int mtt_entry_sz; + u32 max_msg_sz; + u32 page_size_cap; + u32 flags; + u16 stat_rate_support; + u8 port_width_cap[MLX4_MAX_PORTS + 1]; +}; + +struct mlx4_buf_list { + u8 *buf; + dma_addr_t map; +}; + +struct mlx4_buf { + union { + struct mlx4_buf_list direct; + struct mlx4_buf_list *page_list; + } u; + int nbufs; + int npages; + int page_shift; +}; + +struct mlx4_mtt { + u32 first_seg; + int order; + int page_shift; +}; + +struct mlx4_mr { + struct mlx4_mtt mtt; + u64 iova; + u64 size; + u32 key; + u32 pd; + u32 access; + int enabled; +}; + +struct mlx4_fmr { + struct mlx4_mr mr; + struct mlx4_mpt_entry *mpt; + __be64 *mtts; + dma_addr_t dma_handle; + int max_pages; + int max_maps; + int maps; + u8 page_shift; +}; + +struct mlx4_uar { + unsigned long pfn; + int index; +}; + +struct mlx4_cq { + void (*comp) (struct mlx4_cq *); + void (*event) (struct mlx4_cq *, enum mlx4_event); + + struct mlx4_uar *uar; + + u32 cons_index; + + __be32 *set_ci_db; + __be32 *arm_db; + int arm_sn; + + int cqn; + + atomic_t refcount; + struct completion free; + + // Windows specific + int *p_u_arm_sn; + PMDL mdl; +}; + +struct mlx4_qp { + void (*event) (struct mlx4_qp *, enum mlx4_event); + + int qpn; + + atomic_t refcount; + struct completion free; +}; + +struct mlx4_srq { + void (*event) (struct mlx4_srq *, enum mlx4_event); + + int srqn; + int max; + int max_gs; + int wqe_shift; + + atomic_t refcount; + struct completion free; +}; + +struct mlx4_av { + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 dgid[16]; +}; + +struct mlx4_dev { + struct pci_dev *pdev; + unsigned long flags; + struct mlx4_caps caps; + struct radix_tree_root qp_table_tree; + u32 rev_id; + char board_id[MLX4_BOARD_ID_LEN]; +}; + +struct mlx4_init_port_param { + int set_guid0; + int set_node_guid; + int set_si_guid; + u16 mtu; + int port_width_cap; + u16 vl_cap; + u16 max_gid; + u16 max_pkey; + u64 guid0; + u64 node_guid; + u64 si_guid; +}; + +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, + struct mlx4_buf *buf); +void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); + +int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); +void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); + +int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); +void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); + +int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, + struct mlx4_mtt *mtt); +void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt); +u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); + +int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, + int npages, int page_shift, struct mlx4_mr *mr); +void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list); +int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_buf *buf); + +int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq); +void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp); +void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); + +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, + u64 db_rec, struct mlx4_srq *srq); +void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); +int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); +int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); + +int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); +int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); + +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); + +int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, + int npages, u64 iova, u32 *lkey, u32 *rkey); +int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, + int max_maps, u8 page_shift, struct mlx4_fmr *fmr); +int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr); +void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, + u32 *lkey, u32 *rkey); +int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); +int mlx4_SYNC_TPT(struct mlx4_dev *dev); + +#endif /* MLX4_DEVICE_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/doorbell.h b/branches/ConnectX/hw/mlx4/kernel/inc/doorbell.h new file mode 100644 index 00000000..2a00c154 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/doorbell.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_DOORBELL_H +#define MLX4_DOORBELL_H + +#define MLX4_SEND_DOORBELL 0x14 +#define MLX4_CQ_DOORBELL 0x20 + +#if BITS_PER_LONG == 64 +/* + * Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MLX4_DECLARE_DOORBELL_LOCK(name) +#define MLX4_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MLX4_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mlx4_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq(*(u64 *) val, dest); +} + +#else + +/* + * Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MLX4_DECLARE_DOORBELL_LOCK(name) spinlock_t name +#define MLX4_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MLX4_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mlx4_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + spin_lock_irqsave(doorbell_lock, &flags); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], (u8*)dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +#endif + +#endif /* MLX4_DOORBELL_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/driver.h b/branches/ConnectX/hw/mlx4/kernel/inc/driver.h new file mode 100644 index 00000000..e7962b28 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/driver.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_DRIVER_H +#define MLX4_DRIVER_H + +#include "device.h" + +struct mlx4_dev; + +enum mlx4_dev_event { + MLX4_DEV_EVENT_CATASTROPHIC_ERROR, + MLX4_DEV_EVENT_PORT_UP, + MLX4_DEV_EVENT_PORT_DOWN, + MLX4_DEV_EVENT_PORT_REINIT, +}; + +struct mlx4_interface { + void * (*add) (struct mlx4_dev *dev); + void (*remove)(struct mlx4_dev *dev, void *context); + void (*event) (struct mlx4_dev *dev, void *context, + enum mlx4_dev_event event, int subtype, + int port); + struct list_head list; +}; + +int mlx4_register_interface(struct mlx4_interface *intf); +void mlx4_unregister_interface(struct mlx4_interface *intf); + +#endif /* MLX4_DRIVER_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/ib_cache.h b/branches/ConnectX/hw/mlx4/kernel/inc/ib_cache.h new file mode 100644 index 00000000..85205efd --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/ib_cache.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#pragma once + +#include "ib_verbs.h" + +/** + * ib_get_cached_gid - Returns a cached GID table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached GID table to query. + * @gid: The GID value found at the specified index. + * + * ib_get_cached_gid() fetches the specified GID table entry stored in + * the local software cache. + */ +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid); + +/** + * ib_find_cached_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ +int ib_find_cached_gid(struct ib_device *device, + union ib_gid *gid, + u8 *port_num, + u16 *index); + +/** + * ib_get_cached_pkey - Returns a cached PKey table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached PKey table to query. + * @pkey: The PKey value found at the specified index. + * + * ib_get_cached_pkey() fetches the specified PKey table entry stored in + * the local software cache. + */ +int ib_get_cached_pkey(struct ib_device *device_handle, + u8 port_num, + int index, + u16 *pkey); + +/** + * ib_find_cached_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the cached PKey table where the PKey was found. + * + * ib_find_cached_pkey() searches the specified PKey table in + * the local software cache. + */ +int ib_find_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index); + +/** + * ib_get_cached_lmc - Returns a cached lmc table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @lmc: The lmc value for the specified port for that device. + * + * ib_get_cached_lmc() fetches the specified lmc table entry stored in + * the local software cache. + */ +int ib_get_cached_lmc(struct ib_device *device, + u8 port_num, + u8 *lmc); + diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/ib_mad.h b/branches/ConnectX/hw/mlx4/kernel/inc/ib_mad.h new file mode 100644 index 00000000..e28a8f32 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/ib_mad.h @@ -0,0 +1,657 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $ + */ + +#if !defined( IB_MAD_H ) +#define IB_MAD_H + +#include + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_DEVICE_ADM 0x10 +#define IB_MGMT_CLASS_BOOT_MGMT 0x11 +#define IB_MGMT_CLASS_BIS 0x12 +#define IB_MGMT_CLASS_CONG_MGMT 0x21 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +#define IB_OPENIB_OUI (0x001405) + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 + +#define IB_MGMT_METHOD_RESP 0x80 +#define IB_BM_ATTR_MOD_RESP cpu_to_be32(1) + +#define IB_MGMT_MAX_METHODS 128 + +/* RMPP information */ +#define IB_MGMT_RMPP_VERSION 1 + +#define IB_MGMT_RMPP_TYPE_DATA 1 +#define IB_MGMT_RMPP_TYPE_ACK 2 +#define IB_MGMT_RMPP_TYPE_STOP 3 +#define IB_MGMT_RMPP_TYPE_ABORT 4 + +#define IB_MGMT_RMPP_FLAG_ACTIVE 1 +#define IB_MGMT_RMPP_FLAG_FIRST (1<<1) +#define IB_MGMT_RMPP_FLAG_LAST (1<<2) + +#define IB_MGMT_RMPP_NO_RESPTIME 0x1F + +#define IB_MGMT_RMPP_STATUS_SUCCESS 0 +#define IB_MGMT_RMPP_STATUS_RESX 1 +#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 +#define IB_MGMT_RMPP_STATUS_T2L 118 +#define IB_MGMT_RMPP_STATUS_BAD_LEN 119 +#define IB_MGMT_RMPP_STATUS_BAD_SEG 120 +#define IB_MGMT_RMPP_STATUS_BADT 121 +#define IB_MGMT_RMPP_STATUS_W2S 122 +#define IB_MGMT_RMPP_STATUS_S2B 123 +#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 +#define IB_MGMT_RMPP_STATUS_UNV 125 +#define IB_MGMT_RMPP_STATUS_TMR 126 +#define IB_MGMT_RMPP_STATUS_UNSPEC 127 +#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 + +#define IB_QP0 0 +#define IB_QP1_QKEY 0x80010000 +#define IB_QP_SET_QKEY 0x80000000 + +#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF +#define IB_DEFAULT_PKEY_FULL 0xFFFF + +enum { + IB_MGMT_MAD_HDR = 24, + IB_MGMT_MAD_DATA = 232, + IB_MGMT_RMPP_HDR = 36, + IB_MGMT_RMPP_DATA = 220, + IB_MGMT_VENDOR_HDR = 40, + IB_MGMT_VENDOR_DATA = 216, + IB_MGMT_SA_HDR = 56, + IB_MGMT_SA_DATA = 200, + IB_MGMT_DEVICE_HDR = 64, + IB_MGMT_DEVICE_DATA = 192, +}; + +struct ib_mad_hdr { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; +}; + +struct ib_rmpp_hdr { + u8 rmpp_version; + u8 rmpp_type; + u8 rmpp_rtime_flags; + u8 rmpp_status; + __be32 seg_num; + __be32 paylen_newwin; +}; + +typedef u64 __bitwise ib_sa_comp_mask; + +#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) + +/* + * ib_sa_hdr and ib_sa_mad structures must be packed because they have + * 64-bit fields that are only 32-bit aligned. 64-bit architectures will + * lay them out wrong otherwise. (And unfortunately they are sent on + * the wire so we can't change the layout) + */ +#pragma pack(push,1) +struct ib_sa_hdr { + __be64 sm_key; + __be16 attr_offset; + __be16 reserved; + ib_sa_comp_mask comp_mask; +} __attribute__ ((packed)); +#pragma pack(pop) + +struct ib_mad { + struct ib_mad_hdr mad_hdr; + u8 data[IB_MGMT_MAD_DATA]; +}; + +struct ib_rmpp_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 data[IB_MGMT_RMPP_DATA]; +}; + +#pragma pack(push,1) +struct ib_sa_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + u8 data[IB_MGMT_SA_DATA]; +} __attribute__ ((packed)); +#pragma pack(pop) + +struct ib_vendor_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[IB_MGMT_VENDOR_DATA]; +}; + +struct ib_class_port_info +{ + u8 base_version; + u8 class_version; + __be16 capability_mask; + u8 reserved[3]; + u8 resp_time_value; + u8 redirect_gid[16]; + __be32 redirect_tcslfl; + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; + __be32 redirect_qkey; + u8 trap_gid[16]; + __be32 trap_tcslfl; + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hlqp; + __be32 trap_qkey; +}; + +/** + * ib_mad_send_buf - MAD data buffer and work request for sends. + * @next: A pointer used to chain together MADs for posting. + * @mad: References an allocated MAD data buffer for MADs that do not have + * RMPP active. For MADs using RMPP, references the common and management + * class specific headers. + * @mad_agent: MAD agent that allocated the buffer. + * @ah: The address handle to use when sending the MAD. + * @context: User-controlled context fields. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * includes the common MAD, RMPP, and class specific headers. + * @data_len: Indicates the total size of user-transferred data. + * @seg_count: The number of RMPP segments allocated for this send. + * @seg_size: Size of each RMPP segment. + * @timeout_ms: Time to wait for a response. + * @retries: Number of times to retry a request for a response. + * + * Users are responsible for initializing the MAD buffer itself, with the + * exception of any RMPP header. Additional segment buffer space allocated + * beyond data_len is padding. + */ +struct ib_mad_send_buf { + struct ib_mad_send_buf *next; + void *mad; + struct ib_mad_agent *mad_agent; + struct ib_ah *ah; + void *context[2]; + int hdr_len; + int data_len; + int seg_count; + int seg_size; + int timeout_ms; + int retries; +}; + +/** + * ib_response_mad - Returns if the specified MAD has been generated in + * response to a sent request or trap. + */ +int ib_response_mad(struct ib_mad *mad); + +/** + * ib_get_rmpp_resptime - Returns the RMPP response time. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags >> 3; +} + +/** + * ib_get_rmpp_flags - Returns the RMPP flags. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags & 0x7; +} + +/** + * ib_set_rmpp_resptime - Sets the response time in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @rtime: The response time to set. + */ +static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) +{ + rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); +} + +/** + * ib_set_rmpp_flags - Sets the flags in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @flags: The flags to set. + */ +static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) +{ + rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) | + (flags & 0x7); +} + +struct ib_mad_agent; +struct ib_mad_send_wc; +struct ib_mad_recv_wc; + +/** + * ib_mad_send_handler - callback handler for a sent MAD. + * @mad_agent: MAD agent that sent the MAD. + * @mad_send_wc: Send work completion information on the sent MAD. + */ +typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_snoop_handler - Callback handler for snooping sent MADs. + * @mad_agent: MAD agent that snooped the MAD. + * @send_wr: Work request information on the sent MAD. + * @mad_send_wc: Work completion information on the sent MAD. Valid + * only for snooping that occurs on a send completion. + * + * Clients snooping MADs should not modify data referenced by the @send_wr + * or @mad_send_wc. + */ +typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_recv_handler - callback handler for a received MAD. + * @mad_agent: MAD agent requesting the received MAD. + * @mad_recv_wc: Received work completion information on the received MAD. + * + * MADs received in response to a send request operation will be handed to + * the user before the send operation completes. All data buffers given + * to registered agents through this routine are owned by the receiving + * client, except for snooping agents. Clients snooping MADs should not + * modify the data referenced by @mad_recv_wc. + */ +typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_mad_agent - Used to track MAD registration with the access layer. + * @device: Reference to device registration is on. + * @qp: Reference to QP used for sending and receiving MADs. + * @mr: Memory region for system memory usable for DMA. + * @recv_handler: Callback handler for a received MAD. + * @send_handler: Callback handler for a sent MAD. + * @snoop_handler: Callback handler for snooped sent MADs. + * @context: User-specified context associated with this registration. + * @hi_tid: Access layer assigned transaction ID for this client. + * Unsolicited MADs sent by this client will have the upper 32-bits + * of their TID set to this value. + * @port_num: Port number on which QP is registered + * @rmpp_version: If set, indicates the RMPP version used by this agent. + */ +struct ib_mad_agent { + struct ib_device *device; + struct ib_qp *qp; + struct ib_mr *mr; + ib_mad_recv_handler recv_handler; + ib_mad_send_handler send_handler; + ib_mad_snoop_handler snoop_handler; + void *context; + u32 hi_tid; + u8 port_num; + u8 rmpp_version; +}; + +/** + * ib_mad_send_wc - MAD send completion information. + * @send_buf: Send MAD data buffer associated with the send MAD request. + * @status: Completion status. + * @vendor_err: Optional vendor error information returned with a failed + * request. + */ +struct ib_mad_send_wc { + struct ib_mad_send_buf *send_buf; + enum ib_wc_status status; + u32 vendor_err; +}; + +/** + * ib_mad_recv_buf - received MAD buffer information. + * @list: Reference to next data buffer for a received RMPP MAD. + * @grh: References a data buffer containing the global route header. + * The data refereced by this buffer is only valid if the GRH is + * valid. + * @mad: References the start of the received MAD. + */ +struct ib_mad_recv_buf { + struct list_head list; + struct ib_grh *grh; + struct ib_mad *mad; +}; + +/** + * ib_mad_recv_wc - received MAD information. + * @wc: Completion information for the received data. + * @recv_buf: Specifies the location of the received data buffer(s). + * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. + * @mad_len: The length of the received MAD, without duplicated headers. + * + * For received response, the wr_id contains a pointer to the ib_mad_send_buf + * for the corresponding send request. + */ +struct ib_mad_recv_wc { + ib_wc_t *wc; + struct ib_mad_recv_buf recv_buf; + struct list_head rmpp_list; + int mad_len; +}; + +/** + * ib_mad_reg_req - MAD registration request + * @mgmt_class: Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version: Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @method_mask: The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + */ +struct ib_mad_reg_req { + u8 mgmt_class; + u8 mgmt_class_version; + u8 oui[3]; + DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); +}; + +/** + * ib_register_mad_agent - Register to send/receive MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP to access. Must be either + * IB_QPT_SMI or IB_QPT_GSI. + * @mad_reg_req: Specifies which unsolicited MADs should be received + * by the caller. This parameter may be NULL if the caller only + * wishes to receive solicited responses. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +enum ib_mad_snoop_flags { + /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ + /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ + IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), + /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ + IB_MAD_SNOOP_RECVS = (1<<4) + /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ + /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ +}; + +/** + * ib_register_mad_snoop - Register to snoop sent and received MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP traffic to snoop. Must be either + * IB_QPT_SMI or IB_QPT_GSI. + * @mad_snoop_flags: Specifies information where snooping occurs. + * @send_handler: The callback routine invoked for a snooped send. + * @recv_handler: The callback routine invoked for a snooped receive. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_unregister_mad_agent - Unregisters a client from using MAD services. + * @mad_agent: Corresponding MAD registration request to deregister. + * + * After invoking this routine, MAD services are no longer usable by the + * client on the associated QP. + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); + +/** + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client. + * @send_buf: Specifies the information needed to send the MAD(s). + * @bad_send_buf: Specifies the MAD on which an error was encountered. This + * parameter is optional if only a single MAD is posted. + * + * Sent MADs are not guaranteed to complete in the order that they were posted. + * + * If the MAD requires RMPP, the data buffer should contain a single copy + * of the common MAD, RMPP, and class specific headers, followed by the class + * defined data. If the class defined data would not divide evenly into + * RMPP segments, then space must be allocated at the end of the referenced + * buffer for any required padding. To indicate the amount of class defined + * data being transferred, the paylen_newwin field in the RMPP header should + * be set to the size of the class specific header plus the amount of class + * defined data being transferred. The paylen_newwin field should be + * specified in network-byte order. + */ +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf); + + +/** + * ib_free_recv_mad - Returns data buffers used to receive a MAD. + * @mad_recv_wc: Work completion information for a received MAD. + * + * Clients receiving MADs through their ib_mad_recv_handler must call this + * routine to return the work completion buffers to the access layer. + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @send_buf: Indicates the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf); + +/** + * ib_modify_mad - Modifies an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @send_buf: Indicates the MAD to modify. + * @timeout_ms: New timeout value for sent MAD. + * + * This call will reset the timeout value for a sent MAD to the specified + * value. + */ +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms); + +/** + * ib_redirect_mad_qp - Registers a QP for MAD services. + * @qp: Reference to a QP that requires MAD services. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + * + * Use of this call allows clients to use MAD services, such as RMPP, + * on user-owned QPs. After calling this routine, users may send + * MADs on the specified QP by calling ib_mad_post_send. + */ +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_process_mad_wc - Processes a work completion associated with a + * MAD sent or received on a redirected QP. + * @mad_agent: Specifies the registered MAD service using the redirected QP. + * @wc: References a work completion associated with a sent or received + * MAD segment. + * + * This routine is used to complete or continue processing on a MAD request. + * If the work completion is associated with a send operation, calling + * this routine is required to continue an RMPP transfer or to wait for a + * corresponding response, if it is a request. If the work completion is + * associated with a receive operation, calling this routine is required to + * process an inbound or outbound RMPP transfer, or to match a response MAD + * with its corresponding request. + */ +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + ib_wc_t *wc); + +/** + * ib_create_send_mad - Allocate and initialize a data buffer and work request + * for sending a MAD. + * @mad_agent: Specifies the registered MAD service to associate with the MAD. + * @remote_qpn: Specifies the QPN of the receiving node. + * @pkey_index: Specifies which PKey the MAD will be sent using. This field + * is valid only if the remote_qpn is QP 1. + * @rmpp_active: Indicates if the send will enable RMPP. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * should include the common MAD header, RMPP header, plus any class + * specific header. + * @data_len: Indicates the size of any user-transferred data. The call will + * automatically adjust the allocated buffer size to account for any + * additional padding that may be necessary. + * @gfp_mask: GFP mask used for the memory allocation. + * + * This routine allocates a MAD for sending. The returned MAD send buffer + * will reference a data buffer usable for sending a MAD, along + * with an initialized work request structure. Users may modify the returned + * MAD data buffer before posting the send. + * + * The returned MAD header, class specific headers, and any padding will be + * cleared. Users are responsible for initializing the common MAD header, + * any class specific header, and MAD data area. + * If @rmpp_active is set, the RMPP header will be initialized for sending. + */ +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + int rmpp_active, + int hdr_len, int data_len, + gfp_t gfp_mask); + +/** + * ib_is_mad_class_rmpp - returns whether given management class + * supports RMPP. + * @mgmt_class: management class + * + * This routine returns whether the management class supports RMPP. + */ +int ib_is_mad_class_rmpp(u8 mgmt_class); + +/** + * ib_get_mad_data_offset - returns the data offset for a given + * management class. + * @mgmt_class: management class + * + * This routine returns the data offset in the MAD for the management + * class requested. + */ +int ib_get_mad_data_offset(u8 mgmt_class); + +/** + * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment. + * @send_buf: Previously allocated send data buffer. + * @seg_num: number of segment to return + * + * This routine returns a pointer to the data buffer of an RMPP MAD. + * Users must provide synchronization to @send_buf around this call. + */ +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num); + +/** + * ib_free_send_mad - Returns data buffers used to send a MAD. + * @send_buf: Previously allocated send data buffer. + */ +void ib_free_send_mad(struct ib_mad_send_buf *send_buf); + +#endif /* IB_MAD_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/ib_pack.h b/branches/ConnectX/hw/mlx4/kernel/inc/ib_pack.h new file mode 100644 index 00000000..ac7283d5 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/ib_pack.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef IB_PACK_H +#define IB_PACK_H + +#include "ib_verbs.h" + +enum { + IB_LRH_BYTES = 8, + IB_GRH_BYTES = 40, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8 +}; + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_words; + int offset_bits; + int size_bits; + char *field_name; +}; + +#define RESERVED \ + .field_name = "reserved" + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, + * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IB_OPCODE(transport, op) \ + IB_OPCODE_ ## transport ## _ ## op = \ + IB_OPCODE_ ## transport + IB_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IB_OPCODE_RC = 0x00, + IB_OPCODE_UC = 0x20, + IB_OPCODE_RD = 0x40, + IB_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IB_OPCODE_SEND_FIRST = 0x00, + IB_OPCODE_SEND_MIDDLE = 0x01, + IB_OPCODE_SEND_LAST = 0x02, + IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IB_OPCODE_SEND_ONLY = 0x04, + IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IB_OPCODE_RDMA_WRITE_FIRST = 0x06, + IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IB_OPCODE_RDMA_WRITE_LAST = 0x08, + IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IB_OPCODE_RDMA_READ_REQUEST = 0x0c, + IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IB_OPCODE_ACKNOWLEDGE = 0x11, + IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IB_OPCODE_COMPARE_SWAP = 0x13, + IB_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IB_OPCODE() + macro for more details */ + + /* RC */ + IB_OPCODE(RC, SEND_FIRST), + IB_OPCODE(RC, SEND_MIDDLE), + IB_OPCODE(RC, SEND_LAST), + IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, SEND_ONLY), + IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_FIRST), + IB_OPCODE(RC, RDMA_WRITE_MIDDLE), + IB_OPCODE(RC, RDMA_WRITE_LAST), + IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_ONLY), + IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_READ_REQUEST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RC, ACKNOWLEDGE), + IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RC, COMPARE_SWAP), + IB_OPCODE(RC, FETCH_ADD), + + /* UC */ + IB_OPCODE(UC, SEND_FIRST), + IB_OPCODE(UC, SEND_MIDDLE), + IB_OPCODE(UC, SEND_LAST), + IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, SEND_ONLY), + IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_FIRST), + IB_OPCODE(UC, RDMA_WRITE_MIDDLE), + IB_OPCODE(UC, RDMA_WRITE_LAST), + IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_ONLY), + IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IB_OPCODE(RD, SEND_FIRST), + IB_OPCODE(RD, SEND_MIDDLE), + IB_OPCODE(RD, SEND_LAST), + IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, SEND_ONLY), + IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_FIRST), + IB_OPCODE(RD, RDMA_WRITE_MIDDLE), + IB_OPCODE(RD, RDMA_WRITE_LAST), + IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_ONLY), + IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_READ_REQUEST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RD, ACKNOWLEDGE), + IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RD, COMPARE_SWAP), + IB_OPCODE(RD, FETCH_ADD), + + /* UD */ + IB_OPCODE(UD, SEND_ONLY), + IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +enum { + IB_LNH_RAW = 0, + IB_LNH_IP = 1, + IB_LNH_IBA_LOCAL = 2, + IB_LNH_IBA_GLOBAL = 3 +}; + +struct ib_unpacked_lrh { + u8 virtual_lane; + u8 link_version; + u8 service_level; + u8 link_next_header; + __be16 destination_lid; + __be16 packet_length; + __be16 source_lid; +}; + +struct ib_unpacked_grh { + u8 ip_version; + u8 traffic_class; + __be32 flow_label; + __be16 payload_length; + u8 next_header; + u8 hop_limit; + union ib_gid source_gid; + union ib_gid destination_gid; +}; + +struct ib_unpacked_bth { + u8 opcode; + u8 solicited_event; + u8 mig_req; + u8 pad_count; + u8 transport_header_version; + __be16 pkey; + __be32 destination_qpn; + u8 ack_req; + __be32 psn; +}; + +struct ib_unpacked_deth { + __be32 qkey; + __be32 source_qpn; +}; + +struct ib_ud_header { + struct ib_unpacked_lrh lrh; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; + struct ib_unpacked_deth deth; + int immediate_present; + __be32 immediate_data; +}; + +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + u8 *buf); + +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure); + +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header); + +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf); + +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header); + +#endif /* IB_PACK_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/ib_smi.h b/branches/ConnectX/hw/mlx4/kernel/inc/ib_smi.h new file mode 100644 index 00000000..1f80dab5 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/ib_smi.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#if !defined( IB_SMI_H ) +#define IB_SMI_H + +#include + +#define IB_SMP_DATA_SIZE 64 +#define IB_SMP_MAX_PATH_HOPS 64 + +#pragma pack(push,1) +struct ib_smp { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + u8 hop_ptr; + u8 hop_cnt; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + __be16 dr_slid; + __be16 dr_dlid; + u8 reserved[28]; + u8 data[IB_SMP_DATA_SIZE]; + u8 initial_path[IB_SMP_MAX_PATH_HOPS]; + u8 return_path[IB_SMP_MAX_PATH_HOPS]; +} __attribute__ ((packed)); +#pragma pack(pop) + + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002) +#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010) +#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011) +#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012) +#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014) +#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015) +#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016) +#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017) +#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018) +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019) +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A) +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B) +#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020) +#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030) +#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031) +#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00) + +struct ib_port_info { + __be64 mkey; + __be64 gid_prefix; + __be16 lid; + __be16 sm_lid; + __be32 cap_mask; + __be16 diag_code; + __be16 mkey_lease_period; + u8 local_port_num; + u8 link_width_enabled; + u8 link_width_supported; + u8 link_width_active; + u8 linkspeed_portstate; /* 4 bits, 4 bits */ + u8 portphysstate_linkdown; /* 4 bits, 4 bits */ + u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ + u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ + u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ + u8 vlcap_inittype; /* 4 bits, 4 bits */ + u8 vl_high_limit; + u8 vl_arb_high_cap; + u8 vl_arb_low_cap; + u8 inittypereply_mtucap; /* 4 bits, 4 bits */ + u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ + u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ + __be16 mkey_violations; + __be16 pkey_violations; + __be16 qkey_violations; + u8 guid_cap; + u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ + u8 resv_resptimevalue; /* 3 bits, 5 bits */ + u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ + __be16 max_credit_hint; + u8 resv; + u8 link_roundtrip_latency[3]; +}; + +static inline u8 +ib_get_smp_direction(struct ib_smp *smp) +{ + return (u8)((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); +} + +#endif /* IB_SMI_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs.h b/branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs.h new file mode 100644 index 00000000..93ce17ac --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs.h @@ -0,0 +1,1845 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#if !defined(IB_VERBS_H) +#define IB_VERBS_H + +#include + +union ib_gid { + u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +#include "ib_verbs_ex.h" + +enum rdma_node_type { + /* IB values map to NodeInfo:NodeType. */ + RDMA_NODE_IB_CA = 1, + RDMA_NODE_IB_SWITCH, + RDMA_NODE_IB_ROUTER, + RDMA_NODE_RNIC +}; + +enum rdma_transport_type { + RDMA_TRANSPORT_IB, + RDMA_TRANSPORT_IWARP +}; + +enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; + +enum ib_device_cap_flags { + IB_DEVICE_RESIZE_MAX_WR = 1, + IB_DEVICE_BAD_PKEY_CNTR = (1<<1), + IB_DEVICE_BAD_QKEY_CNTR = (1<<2), + IB_DEVICE_RAW_MULTI = (1<<3), + IB_DEVICE_AUTO_PATH_MIG = (1<<4), + IB_DEVICE_CHANGE_PHY_PORT = (1<<5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), + IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), + IB_DEVICE_SHUTDOWN_PORT = (1<<8), + IB_DEVICE_INIT_TYPE = (1<<9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), + IB_DEVICE_SYS_IMAGE_GUID = (1<<11), + IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), + IB_DEVICE_SRQ_RESIZE = (1<<13), + IB_DEVICE_N_NOTIFY_CQ = (1<<14), + IB_DEVICE_ZERO_STAG = (1<<15), + IB_DEVICE_SEND_W_INV = (1<<16), + IB_DEVICE_MEM_WINDOW = (1<<17) +}; + +enum ib_atomic_cap { + IB_ATOMIC_NON, + IB_ATOMIC_HCA, + IB_ATOMIC_GLOB +}; + +struct ib_device_attr { + u64 fw_ver; + __be64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + int max_qp; + int max_qp_wr; + int device_cap_flags; + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ib_atomic_cap atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + u16 max_pkeys; + u8 local_ca_ack_delay; +}; + +enum ib_mtu { + IB_MTU_256 = 1, + IB_MTU_512 = 2, + IB_MTU_1024 = 3, + IB_MTU_2048 = 4, + IB_MTU_4096 = 5 +}; + +static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) +{ + switch (mtu) { + case IB_MTU_256: return 256; + case IB_MTU_512: return 512; + case IB_MTU_1024: return 1024; + case IB_MTU_2048: return 2048; + case IB_MTU_4096: return 4096; + default: return -1; + } +} + +enum ib_port_state { + IB_PORT_NOP = 0, + IB_PORT_DOWN = 1, + IB_PORT_INIT = 2, + IB_PORT_ARMED = 3, + IB_PORT_ACTIVE = 4, + IB_PORT_ACTIVE_DEFER = 5 +}; + +enum ib_port_cap_flags { + IB_PORT_SM = 1 << 1, + IB_PORT_NOTICE_SUP = 1 << 2, + IB_PORT_TRAP_SUP = 1 << 3, + IB_PORT_OPT_IPD_SUP = 1 << 4, + IB_PORT_AUTO_MIGR_SUP = 1 << 5, + IB_PORT_SL_MAP_SUP = 1 << 6, + IB_PORT_MKEY_NVRAM = 1 << 7, + IB_PORT_PKEY_NVRAM = 1 << 8, + IB_PORT_LED_INFO_SUP = 1 << 9, + IB_PORT_SM_DISABLED = 1 << 10, + IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_CM_SUP = 1 << 16, + IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, + IB_PORT_REINIT_SUP = 1 << 18, + IB_PORT_DEVICE_MGMT_SUP = 1 << 19, + IB_PORT_VENDOR_CLASS_SUP = 1 << 20, + IB_PORT_DR_NOTICE_SUP = 1 << 21, + IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_PORT_BOOT_MGMT_SUP = 1 << 23, + IB_PORT_LINK_LATENCY_SUP = 1 << 24, + IB_PORT_CLIENT_REG_SUP = 1 << 25 +}; + +enum ib_port_width { + IB_WIDTH_1X = 1, + IB_WIDTH_4X = 2, + IB_WIDTH_8X = 4, + IB_WIDTH_12X = 8 +}; + +static inline int ib_width_enum_to_int(enum ib_port_width width) +{ + switch (width) { + case IB_WIDTH_1X: return 1; + case IB_WIDTH_4X: return 4; + case IB_WIDTH_8X: return 8; + case IB_WIDTH_12X: return 12; + default: return -1; + } +} + +struct ib_port_attr { + enum ib_port_state state; + enum ib_mtu max_mtu; + enum ib_mtu active_mtu; + int gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; + u8 phys_state; +}; + +enum ib_device_modify_flags { + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0, + IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 +}; + +struct ib_device_modify { + u64 sys_image_guid; + char node_desc[64]; +}; + +enum ib_port_modify_flags { + IB_PORT_SHUTDOWN = 1, + IB_PORT_INIT_TYPE = (1<<2), + IB_PORT_RESET_QKEY_CNTR = (1<<3) +}; + +struct ib_port_modify { + u32 set_port_cap_mask; + u32 clr_port_cap_mask; + u8 init_type; +}; + +enum ib_event_type { + IB_EVENT_CQ_ERR, + IB_EVENT_QP_FATAL, + IB_EVENT_QP_REQ_ERR, + IB_EVENT_QP_ACCESS_ERR, + IB_EVENT_COMM_EST, + IB_EVENT_SQ_DRAINED, + IB_EVENT_PATH_MIG, + IB_EVENT_PATH_MIG_ERR, + IB_EVENT_DEVICE_FATAL, + IB_EVENT_PORT_ACTIVE, + IB_EVENT_PORT_ERR, + IB_EVENT_LID_CHANGE, + IB_EVENT_PKEY_CHANGE, + IB_EVENT_SM_CHANGE, + IB_EVENT_SRQ_ERR, + IB_EVENT_SRQ_LIMIT_REACHED, + IB_EVENT_QP_LAST_WQE_REACHED, + IB_EVENT_CLIENT_REREGISTER +}; + +struct ib_event { + struct ib_device *device; + union { + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_srq *srq; + u8 port_num; + } element; + enum ib_event_type event; + struct ib_event_ex x; + }; + +struct ib_event_handler { + struct ib_device *device; + void (*handler)(struct ib_event_handler *, struct ib_event *); + struct list_head list; +}; + +#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ + { \ + (_ptr)->device = _device; \ + (_ptr)->handler = _handler; \ + INIT_LIST_HEAD(&(_ptr)->list); \ + } + +struct ib_global_route { + union ib_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; +}; + +struct ib_grh { + __be32 version_tclass_flow; + __be16 paylen; + u8 next_hdr; + u8 hop_limit; + union ib_gid sgid; + union ib_gid dgid; +}; + +enum { + IB_MULTICAST_QPN = 0xffffff +}; + +#define XIB_LID_PERMISSIVE __constant_htons(0xFFFF) + +enum ib_ah_flags { + IB_AH_GRH = 1 +}; + +enum ib_rate { + IB_RATE_PORT_CURRENT = 0, + IB_RATE_2_5_GBPS = 2, + IB_RATE_5_GBPS = 5, + IB_RATE_10_GBPS = 3, + IB_RATE_20_GBPS = 6, + IB_RATE_30_GBPS = 4, + IB_RATE_40_GBPS = 7, + IB_RATE_60_GBPS = 8, + IB_RATE_80_GBPS = 9, + IB_RATE_120_GBPS = 10 +}; + +/** + * ib_rate_to_mult - Convert the IB rate enum to a multiple of the + * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be + * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. + * @rate: rate to convert. + */ +int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; + +/** + * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate + * enum. + * @mult: multiple to convert. + */ +enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; + +struct ib_ah_attr { + struct ib_global_route grh; + u16 dlid; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; +}; + +enum ib_wc_status { + IB_WC_SUCCESS, + IB_WC_LOC_LEN_ERR, + IB_WC_LOC_QP_OP_ERR, + IB_WC_LOC_EEC_OP_ERR, + IB_WC_LOC_PROT_ERR, + IB_WC_WR_FLUSH_ERR, + IB_WC_MW_BIND_ERR, + IB_WC_BAD_RESP_ERR, + IB_WC_LOC_ACCESS_ERR, + IB_WC_REM_INV_REQ_ERR, + IB_WC_REM_ACCESS_ERR, + IB_WC_REM_OP_ERR, + IB_WC_RETRY_EXC_ERR, + IB_WC_RNR_RETRY_EXC_ERR, + IB_WC_LOC_RDD_VIOL_ERR, + IB_WC_REM_INV_RD_REQ_ERR, + IB_WC_REM_ABORT_ERR, + IB_WC_INV_EECN_ERR, + IB_WC_INV_EEC_STATE_ERR, + IB_WC_FATAL_ERR, + IB_WC_RESP_TIMEOUT_ERR, + IB_WC_GENERAL_ERR +}; + +enum ib_wc_opcode { + XIB_WC_SEND, + XIB_WC_RDMA_WRITE, + XIB_WC_RDMA_READ, + XIB_WC_COMP_SWAP, + XIB_WC_FETCH_ADD, + XIB_WC_BIND_MW, +/* + * Set value of XIB_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & XIB_WC_RECV). + */ + XIB_WC_RECV = 1 << 7, + XIB_WC_RECV_RDMA_WITH_IMM +}; + +enum ib_wc_flags { + IB_WC_GRH = 1, + IB_WC_WITH_IMM = (1<<1), + IB_WC_FORWARD = (1<<2) +}; + +struct ib_wc { + u64 wr_id; + enum ib_wc_status status; + enum ib_wc_opcode opcode; + u32 vendor_err; + u32 byte_len; + struct ib_qp *qp; + __be32 imm_data; + u32 src_qp; + int wc_flags; + u16 pkey_index; + u16 slid; + u8 sl; + u8 dlid_path_bits; + u8 port_num; /* valid only for DR SMPs on switches */ +}; + +enum ib_cq_notify_flags { + IB_CQ_SOLICITED = 1 << 0, + IB_CQ_NEXT_COMP = 1 << 1, + IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP, + IB_CQ_REPORT_MISSED_EVENTS = 1 << 2, +}; + +enum ib_srq_attr_mask { + XIB_SRQ_MAX_WR = 1 << 0, + XIB_SRQ_LIMIT = 1 << 1, +}; + +struct ib_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; +}; + +struct ib_srq_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + struct ib_srq_attr attr; +}; + +struct ib_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +enum ib_sig_type { + IB_SIGNAL_ALL_WR, + IB_SIGNAL_REQ_WR +}; + +enum ib_qp_type { + /* + * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries + * here (and in that order) since the MAD layer uses them as + * indices into a 2-entry table. + */ + IB_QPT_SMI, + IB_QPT_GSI, + + IB_QPT_RC, + IB_QPT_UC, + IB_QPT_UD, + IB_QPT_RAW_IP_V6, + IB_QPT_RAW_ETY +}; + +struct ib_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + enum ib_qp_type qp_type; + u8 port_num; /* special QP types only */ +}; + +enum ib_rnr_timeout { + IB_RNR_TIMER_655_36 = 0, + IB_RNR_TIMER_000_01 = 1, + IB_RNR_TIMER_000_02 = 2, + IB_RNR_TIMER_000_03 = 3, + IB_RNR_TIMER_000_04 = 4, + IB_RNR_TIMER_000_06 = 5, + IB_RNR_TIMER_000_08 = 6, + IB_RNR_TIMER_000_12 = 7, + IB_RNR_TIMER_000_16 = 8, + IB_RNR_TIMER_000_24 = 9, + IB_RNR_TIMER_000_32 = 10, + IB_RNR_TIMER_000_48 = 11, + IB_RNR_TIMER_000_64 = 12, + IB_RNR_TIMER_000_96 = 13, + IB_RNR_TIMER_001_28 = 14, + IB_RNR_TIMER_001_92 = 15, + IB_RNR_TIMER_002_56 = 16, + IB_RNR_TIMER_003_84 = 17, + IB_RNR_TIMER_005_12 = 18, + IB_RNR_TIMER_007_68 = 19, + IB_RNR_TIMER_010_24 = 20, + IB_RNR_TIMER_015_36 = 21, + IB_RNR_TIMER_020_48 = 22, + IB_RNR_TIMER_030_72 = 23, + IB_RNR_TIMER_040_96 = 24, + IB_RNR_TIMER_061_44 = 25, + IB_RNR_TIMER_081_92 = 26, + IB_RNR_TIMER_122_88 = 27, + IB_RNR_TIMER_163_84 = 28, + IB_RNR_TIMER_245_76 = 29, + IB_RNR_TIMER_327_68 = 30, + IB_RNR_TIMER_491_52 = 31 +}; + +enum ib_qp_attr_mask { + IB_QP_STATE = 1, + IB_QP_CUR_STATE = (1<<1), + IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), + IB_QP_ACCESS_FLAGS = (1<<3), + IB_QP_PKEY_INDEX = (1<<4), + IB_QP_PORT = (1<<5), + IB_QP_QKEY = (1<<6), + IB_QP_AV = (1<<7), + IB_QP_PATH_MTU = (1<<8), + IB_QP_TIMEOUT = (1<<9), + IB_QP_RETRY_CNT = (1<<10), + IB_QP_RNR_RETRY = (1<<11), + IB_QP_RQ_PSN = (1<<12), + IB_QP_MAX_QP_RD_ATOMIC = (1<<13), + IB_QP_ALT_PATH = (1<<14), + IB_QP_MIN_RNR_TIMER = (1<<15), + IB_QP_SQ_PSN = (1<<16), + IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), + IB_QP_PATH_MIG_STATE = (1<<18), + IB_QP_CAP = (1<<19), + IB_QP_DEST_QPN = (1<<20) +}; + +enum ib_qp_state { + XIB_QPS_RESET, + XIB_QPS_INIT, + XIB_QPS_RTR, + XIB_QPS_RTS, + XIB_QPS_SQD, + XIB_QPS_SQE, + XIB_QPS_ERR +}; + +enum ib_mig_state { + IB_MIG_MIGRATED, + IB_MIG_REARM, + IB_MIG_ARMED +}; + +struct ib_qp_attr { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + enum ib_mig_state path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + int qp_access_flags; + struct ib_qp_cap cap; + struct ib_ah_attr ah_attr; + struct ib_ah_attr alt_ah_attr; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; +}; + +enum ib_wr_opcode { + IB_WR_RDMA_WRITE, + IB_WR_RDMA_WRITE_WITH_IMM, + IB_WR_SEND, + IB_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP, + IB_WR_ATOMIC_FETCH_AND_ADD +}; + +enum ib_send_flags { + IB_SEND_FENCE = 1, + IB_SEND_SIGNALED = (1<<1), + IB_SEND_SOLICITED = (1<<2), + IB_SEND_INLINE = (1<<3) +}; + +struct ib_sge { + u64 addr; + u32 length; + u32 lkey; +}; + +struct ib_send_wr { + struct ib_send_wr *next; + u64 wr_id; + struct ib_sge *sg_list; + int num_sge; + enum ib_wr_opcode opcode; + int send_flags; + __be32 imm_data; + union { + struct { + u64 remote_addr; + u32 rkey; + } rdma; + struct { + u64 remote_addr; + u64 compare_add; + u64 swap; + u32 rkey; + } atomic; + struct { + struct ib_ah *ah; + u32 remote_qpn; + u32 remote_qkey; + u16 pkey_index; /* valid for GSI only */ + u8 port_num; /* valid for DR SMPs on switch only */ + } ud; + } wr; +}; + +struct ib_recv_wr { + struct ib_recv_wr *next; + u64 wr_id; + struct ib_sge *sg_list; + int num_sge; +}; + +enum ib_access_flags { + IB_ACCESS_LOCAL_WRITE = 1, + IB_ACCESS_REMOTE_WRITE = (1<<1), + IB_ACCESS_REMOTE_READ = (1<<2), + IB_ACCESS_REMOTE_ATOMIC = (1<<3), + IB_ACCESS_MW_BIND = (1<<4) +}; + +struct ib_phys_buf { + u64 addr; + u64 size; +}; + +struct ib_mr_attr { + struct ib_pd *pd; + u64 device_virt_addr; + u64 size; + int mr_access_flags; + u32 lkey; + u32 rkey; +}; + +enum ib_mr_rereg_flags { + IB_MR_REREG_TRANS = 1, + IB_MR_REREG_PD = (1<<1), + IB_MR_REREG_ACCESS = (1<<2) +}; + +struct ib_mw_bind { + struct ib_mr *mr; + u64 wr_id; + u64 addr; + u32 length; + int send_flags; + int mw_access_flags; +}; + +struct ib_fmr_attr { + int max_pages; + int max_maps; + u8 page_shift; +}; +struct ib_ucontext { + struct ib_device *device; + int closing; + struct ib_ucontext_ex x; +}; + +struct ib_udata { + void *inbuf; + void *outbuf; + size_t inlen; + size_t outlen; +}; + +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ + { \ + (udata)->inbuf = (void *) (ibuf); \ + (udata)->outbuf = (void *) (obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } + +struct ib_pd { + struct ib_device *device; + struct ib_ucontext *p_uctx; + atomic_t usecnt; /* count all resources */ +}; + +struct ib_ah { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *p_uctx; +}; + +typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); + +struct ib_cq { + struct ib_device *device; + struct ib_ucontext *p_uctx; + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ + struct ib_cq_ex x; +}; + +struct ib_srq { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *p_uctx; + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + atomic_t usecnt; + struct ib_srq_ex x; +}; + +struct ib_qp { + struct ib_device *device; + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_ucontext *p_uctx; + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; + enum ib_qp_type qp_type; + struct ib_qp_ex x; +}; + +struct ib_mr { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *p_uctx; + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ +}; + +struct ib_mw { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *p_uctx; + u32 rkey; +}; + +struct ib_fmr { + struct ib_device *device; + struct ib_pd *pd; + struct list_head list; + u32 lkey; + u32 rkey; +}; + +struct ib_mad; +struct ib_grh; + +enum ib_process_mad_flags { + IB_MAD_IGNORE_MKEY = 1, + IB_MAD_IGNORE_BKEY = 2, + IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY +}; + +enum ib_mad_result { + IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ + IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ + IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ + IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ +}; + +#define IB_DEVICE_NAME_MAX 64 + +struct ib_cache { + rwlock_t lock; + struct ib_event_handler event_handler; + struct ib_pkey_cache **pkey_cache; + struct ib_gid_cache **gid_cache; + u8 *lmc_cache; +}; + +struct ib_dma_mapping_ops { + int (*mapping_error)(struct ib_device *dev, + u64 dma_addr); + u64 (*map_single)(struct ib_device *dev, + void *ptr, size_t size, + enum dma_data_direction direction); + void (*unmap_single)(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction); + u64 (*map_page)(struct ib_device *dev, + dma_addr_t page, unsigned long offset, + size_t size, + enum dma_data_direction direction); + void (*unmap_page)(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction); + int (*map_sg)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction); + void (*unmap_sg)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction); + u64 (*dma_address)(struct ib_device *dev, + struct scatterlist *sg); + unsigned int (*dma_len)(struct ib_device *dev, + struct scatterlist *sg); + void (*sync_single_for_cpu)(struct ib_device *dev, + u64 dma_handle, + size_t size, + enum dma_data_direction dir); + void (*sync_single_for_device)(struct ib_device *dev, + u64 dma_handle, + size_t size, + enum dma_data_direction dir); + void *(*alloc_coherent)(struct ib_device *dev, + size_t size, + u64 *dma_handle, + gfp_t flag); + void (*free_coherent)(struct ib_device *dev, + size_t size, void *cpu_addr, + u64 dma_handle); +}; + +struct iw_cm_verbs; + +struct ib_device { + struct mlx4_dev *dma_device; + + char name[IB_DEVICE_NAME_MAX]; + + struct list_head event_handler_list; + spinlock_t event_handler_lock; + + struct list_head core_list; + struct list_head client_data_list; + spinlock_t client_data_lock; + + struct ib_cache cache; + int *pkey_tbl_len; + int *gid_tbl_len; + + u32 flags; + + int num_comp_vectors; + + struct iw_cm_verbs *iwcm; + + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr); + int (*query_port)(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr); + int (*query_gid)(struct ib_device *device, + u8 port_num, int index, + union ib_gid *gid); + int (*query_pkey)(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + int (*modify_device)(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + int (*modify_port)(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + struct ib_udata *udata); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, + struct vm_area_struct *vma); + struct ib_pd * (*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah * (*create_ah)(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); + int (*modify_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah); + struct ib_srq * (*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + int (*modify_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + int (*post_srq_recv)(struct ib_srq *srq, + ib_recv_wr_t *recv_wr, + ib_recv_wr_t **bad_recv_wr); + struct ib_qp * (*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + int (*modify_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_udata *udata); + int (*query_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + int (*post_send)(struct ib_qp *qp, + ib_send_wr_t *send_wr, + ib_send_wr_t **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, + ib_recv_wr_t *recv_wr, + ib_recv_wr_t **bad_recv_wr); + struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, + int comp_vector, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, + u16 cq_period); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int cqe, + struct ib_udata *udata); + int (*poll_cq)(struct ib_cq *ibcq, + ib_wc_t** const pp_free_wclist, ib_wc_t** const pp_done_wclist); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, + enum ib_cq_notify_flags flags); + int (*req_ncomp_notif)(struct ib_cq *cq, + int wc_cnt); + struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, + int mr_access_flags); + struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, + u64 start, u64 length, + u64 virt_addr, + int mr_access_flags, + struct ib_udata *udata); + int (*query_mr)(struct ib_mr *mr, + struct ib_mr_attr *mr_attr); + int (*dereg_mr)(struct ib_mr *mr); + int (*rereg_phys_mr)(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + int (*bind_mw)(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*detach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*process_mad)(struct ib_device *device, + int process_mad_flags, + u8 port_num, + ib_wc_t *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + + struct ib_dma_mapping_ops *dma_ops; + struct list_head port_list; + + enum { + IB_DEV_UNINITIALIZED, + IB_DEV_REGISTERED, + IB_DEV_UNREGISTERED + } reg_state; + + u64 uverbs_cmd_mask; + int uverbs_abi_ver; + + char node_desc[64]; + __be64 node_guid; + u8 node_type; + u8 phys_port_cnt; + struct ib_device_ex x; +}; + +struct ib_client { + char *name; + void (*add) (struct ib_device *); + void (*remove)(struct ib_device *); + + struct list_head list; +}; + +struct ib_device *ib_alloc_device(size_t size); +void ib_dealloc_device(struct ib_device *device); + +int ib_register_device (struct ib_device *device); +void ib_unregister_device(struct ib_device *device); + +int ib_register_client (struct ib_client *client); +void ib_unregister_client(struct ib_client *client); + +void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data); + +static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) +{ + if (len > udata->inlen) + return -EFAULT; + memcpy(dest, udata->inbuf, len); + return 0; +} + +static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) +{ + if (len > udata->outlen) + return -EFAULT; + memcpy(udata->outbuf, src, len); + return 0; +} + +/** + * ib_modify_qp_is_ok - Check that the supplied attribute mask + * contains all required attributes and no attributes not allowed for + * the given QP state transition. + * @cur_state: Current QP state + * @next_state: Next QP state + * @type: QP type + * @mask: Mask of supplied QP attributes + * + * This function is a helper function that a low-level driver's + * modify_qp method can use to validate the consumer's input. It + * checks that cur_state and next_state are valid QP states, that a + * transition from cur_state to next_state is allowed by the IB spec, + * and that the attribute mask supplied is allowed for the transition. + */ +int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask); + +int ib_register_event_handler (struct ib_event_handler *event_handler); +int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_dispatch_event(struct ib_event *event); + +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr); + +int ib_query_port(struct ib_device *device, + u8 port_num, struct ib_port_attr *port_attr); + +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid); + +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + +int ib_find_gid(struct ib_device *device, union ib_gid *gid, + u8 *port_num, u16 *index); + +int ib_find_pkey(struct ib_device *device, + u8 port_num, u16 pkey, u16 *index); + +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + */ +struct ib_pd *ib_alloc_pd(struct ib_device *device); + +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + */ +int ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_create_ah - Creates an address handle for the given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +/** + * ib_init_ah_from_wc - Initializes address handle attributes from a + * work completion. + * @device: Device on which the received message arrived. + * @port_num: Port on which the received message arrived. + * @wc: Work completion associated with the received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @ah_attr: Returned attributes that can be used when creating an address + * handle for replying to the message. + */ +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ib_wc_t *wc, + struct ib_grh *grh, struct ib_ah_attr *ah_attr); + +/** + * ib_create_ah_from_wc - Creates an address handle associated with the + * sender of the specified work completion. + * @pd: The protection domain associated with the address handle. + * @wc: Work completion information associated with a received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @port_num: The outbound port number to associate with the address. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, ib_wc_t *wc, + struct ib_grh *grh, u8 port_num); + +/** + * ib_modify_ah - Modifies the address vector associated with an address + * handle. + * @ah: The address handle to modify. + * @ah_attr: The new address vector attributes to associate with the + * address handle. + */ +int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ib_query_ah - Queries the address vector associated with an address + * handle. + * @ah: The address handle to query. + * @ah_attr: The address vector attributes associated with the address + * handle. + */ +int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ib_destroy_ah - Destroys an address handle. + * @ah: The address handle to destroy. + */ +int ib_destroy_ah(struct ib_ah *ah); + +/** + * ib_create_srq - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ib_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr); + +/** + * ib_modify_srq - Modifies the attributes for the specified SRQ. + * @srq: The SRQ to modify. + * @srq_attr: On input, specifies the SRQ attributes to modify. On output, + * the current values of selected SRQ attributes are returned. + * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ + * are being modified. + * + * The mask may contain XIB_SRQ_MAX_WR to resize the SRQ and/or + * XIB_SRQ_LIMIT to set the SRQ's limit and request notification when + * the number of receives queued drops below the limit. + */ +int ib_modify_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + +/** + * ib_query_srq - Returns the attribute list and current values for the + * specified SRQ. + * @srq: The SRQ to query. + * @srq_attr: The attributes of the specified SRQ. + */ +int ib_query_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + +/** + * ib_destroy_srq - Destroys the specified SRQ. + * @srq: The SRQ to destroy. + */ +int ib_destroy_srq(struct ib_srq *srq); + +/** + * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. + * @srq: The SRQ to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_srq_recv(struct ib_srq *srq, + ib_recv_wr_t *recv_wr, + ib_recv_wr_t **bad_recv_wr) +{ + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); +} + +/** + * ib_create_qp - Creates a QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + */ +struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ib_modify_qp - Modifies the attributes for the specified QP and then + * transitions the QP to the given state. + * @qp: The QP to modify. + * @qp_attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @qp_attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + */ +int ib_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + +/** + * ib_query_qp - Returns the attribute list and current values for the + * specified QP. + * @qp: The QP to query. + * @qp_attr: The attributes of the specified QP. + * @qp_attr_mask: A bit-mask used to select specific attributes to query. + * @qp_init_attr: Additional attributes of the selected QP. + * + * The qp_attr_mask may be used to limit the query to gathering only the + * selected attributes. + */ +int ib_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ib_modify_cq - Modifies moderation params of the CQ + * @cq: The CQ to modify. + * @cq_count: number of CQEs that will tirgger an event + * @cq_period: max period of time beofre triggering an event + * + * Users can examine the cq structure to determine the actual CQ size. + */ +int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); + +/** + * ib_destroy_qp - Destroys the specified QP. + * @qp: The QP to destroy. + */ +int ib_destroy_qp(struct ib_qp *qp); + +/** + * ib_post_send - Posts a list of work requests to the send queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @send_wr: A list of work requests to post on the send queue. + * @bad_send_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_send(struct ib_qp *qp, + ib_send_wr_t *send_wr, + ib_send_wr_t **bad_send_wr) +{ + return qp->device->post_send(qp, send_wr, bad_send_wr); +} + +/** + * ib_post_recv - Posts a list of work requests to the receive queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_recv(struct ib_qp *qp, + ib_recv_wr_t *recv_wr, + ib_recv_wr_t **bad_recv_wr) +{ + return qp->device->post_recv(qp, recv_wr, bad_recv_wr); +} + +/** + * ib_create_cq - Creates a CQ on the specified device. + * @device: The device on which to create the CQ. + * @comp_handler: A user-specified callback that is invoked when a + * completion event occurs on the CQ. + * @event_handler: A user-specified callback that is invoked when an + * asynchronous event not associated with a completion occurs on the CQ. + * @cq_context: Context associated with the CQ returned to the user via + * the associated completion and event handlers. + * @cqe: The minimum size of the CQ. + * @comp_vector - Completion vector used to signal completion events. + * Must be >= 0 and < context->num_comp_vectors. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +struct ib_cq *ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe, int comp_vector); + +/** + * ib_resize_cq - Modifies the capacity of the CQ. + * @cq: The CQ to resize. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +int ib_resize_cq(struct ib_cq *cq, int cqe); + +/** + * ib_destroy_cq - Destroys the specified CQ. + * @cq: The CQ to destroy. + */ +int ib_destroy_cq(struct ib_cq *cq); + +/** + * ib_poll_cq - poll a CQ for completion(s) + * @cq:the CQ being polled + * @pp_free_wclist: + * On input, a list of work completion structures provided by + * the client. These are used to report completed work requests through + * the pp_done_wclist. + * + * On output, this contains the list of work completions structures for + * which no work completion was found. + * @pp_done_wclist:A list of work completions retrieved from the completion queue. + * + * Poll a CQ for (possibly multiple) completions. If the return value + * is < 0, an error occurred. If the return value is >= 0, it is the + * number of completions returned. If the return value is + * non-negative and < num_entries, then the CQ was emptied. + */ +static inline int ib_poll_cq(struct ib_cq *cq, ib_wc_t** const pp_free_wclist, + ib_wc_t** const pp_done_wclist) +{ + return cq->device->poll_cq(cq, pp_free_wclist, pp_done_wclist); +} + +/** + * ib_peek_cq - Returns the number of unreaped completions currently + * on the specified CQ. + * @cq: The CQ to peek. + * @wc_cnt: A minimum number of unreaped completions to check for. + * + * If the number of unreaped completions is greater than or equal to wc_cnt, + * this function returns wc_cnt, otherwise, it returns the actual number of + * unreaped completions. + */ +int ib_peek_cq(struct ib_cq *cq, int wc_cnt); + +/** + * ib_req_notify_cq - Request completion notification on a CQ. + * @cq: The CQ to generate an event for. + * @flags: + * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP + * to request an event on the next solicited event or next work + * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS + * may also be |ed in to request a hint about missed events, as + * described below. + * + * Return Value: + * < 0 means an error occurred while requesting notification + * == 0 means notification was requested successfully, and if + * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events + * were missed and it is safe to wait for another event. In + * this case is it guaranteed that any work completions added + * to the CQ since the last CQ poll will trigger a completion + * notification event. + * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed + * in. It means that the consumer must poll the CQ again to + * make sure it is empty to avoid missing an event because of a + * race between requesting notification and an entry being + * added to the CQ. This return value means it is possible + * (but not guaranteed) that a work completion has been added + * to the CQ since the last poll without triggering a + * completion notification event. + */ +static inline int ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify_flags flags) +{ + return cq->device->req_notify_cq(cq, flags); +} + +/** + * ib_req_ncomp_notif - Request completion notification when there are + * at least the specified number of unreaped completions on the CQ. + * @cq: The CQ to generate an event for. + * @wc_cnt: The number of unreaped completions that should be on the + * CQ before an event is generated. + */ +static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +{ + return cq->device->req_ncomp_notif ? + cq->device->req_ncomp_notif(cq, wc_cnt) : + -ENOSYS; +} + +/** + * ib_get_dma_mr - Returns a memory region for system memory that is + * usable for DMA. + * @pd: The protection domain associated with the memory region. + * @mr_access_flags: Specifies the memory access rights. + * + * Note that the ib_dma_*() functions defined below must be used + * to create/destroy addresses used with the Lkey or Rkey returned + * by ib_get_dma_mr(). + */ +struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +#if 0 +// TODO: do we need that +/** + * ib_dma_mapping_error - check a DMA addr for error + * @dev: The device for which the dma_addr was created + * @dma_addr: The DMA address to check + */ +static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + if (dev->dma_ops) + return dev->dma_ops->mapping_error(dev, dma_addr); + return dma_mapping_error(dma_addr); +} + +/** + * ib_dma_map_single - Map a kernel virtual address to DMA address + * @dev: The device for which the dma_addr is to be created + * @cpu_addr: The kernel virtual address + * @size: The size of the region in bytes + * @direction: The direction of the DMA + */ +static inline u64 ib_dma_map_single(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + if (dev->dma_ops) + return dev->dma_ops->map_single(dev, cpu_addr, size, direction); + return dma_map_single(dev->dma_device, cpu_addr, size, direction); +} + +/** + * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single() + * @dev: The device for which the DMA address was created + * @addr: The DMA address + * @size: The size of the region in bytes + * @direction: The direction of the DMA + */ +static inline void ib_dma_unmap_single(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + if (dev->dma_ops) + dev->dma_ops->unmap_single(dev, addr, size, direction); + else + dma_unmap_single(dev->dma_device, addr, size, direction); +} + +/** + * ib_dma_map_page - Map a physical page to DMA address + * @dev: The device for which the dma_addr is to be created + * @page: The page to be mapped + * @offset: The offset within the page + * @size: The size of the region in bytes + * @direction: The direction of the DMA + */ +static inline u64 ib_dma_map_page(struct ib_device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + if (dev->dma_ops) + return dev->dma_ops->map_page(dev, page, offset, size, direction); + return dma_map_page(dev->dma_device, page, offset, size, direction); +} + +/** + * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page() + * @dev: The device for which the DMA address was created + * @addr: The DMA address + * @size: The size of the region in bytes + * @direction: The direction of the DMA + */ +static inline void ib_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + if (dev->dma_ops) + dev->dma_ops->unmap_page(dev, addr, size, direction); + else + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +/** + * ib_dma_map_sg - Map a scatter/gather list to DMA addresses + * @dev: The device for which the DMA addresses are to be created + * @sg: The array of scatter/gather entries + * @nents: The number of scatter/gather entries + * @direction: The direction of the DMA + */ +static inline int ib_dma_map_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + if (dev->dma_ops) + return dev->dma_ops->map_sg(dev, sg, nents, direction); + return dma_map_sg(dev->dma_device, sg, nents, direction); +} + +/** + * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses + * @dev: The device for which the DMA addresses were created + * @sg: The array of scatter/gather entries + * @nents: The number of scatter/gather entries + * @direction: The direction of the DMA + */ +static inline void ib_dma_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + if (dev->dma_ops) + dev->dma_ops->unmap_sg(dev, sg, nents, direction); + else + dma_unmap_sg(dev->dma_device, sg, nents, direction); +} + +/** + * ib_sg_dma_address - Return the DMA address from a scatter/gather entry + * @dev: The device for which the DMA addresses were created + * @sg: The scatter/gather entry + */ +static inline u64 ib_sg_dma_address(struct ib_device *dev, + struct scatterlist *sg) +{ + if (dev->dma_ops) + return dev->dma_ops->dma_address(dev, sg); + return sg_dma_address(sg); +} + +/** + * ib_sg_dma_len - Return the DMA length from a scatter/gather entry + * @dev: The device for which the DMA addresses were created + * @sg: The scatter/gather entry + */ +static inline unsigned int ib_sg_dma_len(struct ib_device *dev, + struct scatterlist *sg) +{ + if (dev->dma_ops) + return dev->dma_ops->dma_len(dev, sg); + return sg_dma_len(sg); +} + +/** + * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU + * @dev: The device for which the DMA address was created + * @addr: The DMA address + * @size: The size of the region in bytes + * @dir: The direction of the DMA + */ +static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, + u64 addr, + size_t size, + enum dma_data_direction dir) +{ + if (dev->dma_ops) + dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir); + else + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +/** + * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device + * @dev: The device for which the DMA address was created + * @addr: The DMA address + * @size: The size of the region in bytes + * @dir: The direction of the DMA + */ +static inline void ib_dma_sync_single_for_device(struct ib_device *dev, + u64 addr, + size_t size, + enum dma_data_direction dir) +{ + if (dev->dma_ops) + dev->dma_ops->sync_single_for_device(dev, addr, size, dir); + else + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +/** + * ib_dma_alloc_coherent - Allocate memory and map it for DMA + * @dev: The device for which the DMA address is requested + * @size: The size of the region to allocate in bytes + * @dma_handle: A pointer for returning the DMA address of the region + * @flag: memory allocator flags + */ +static inline void *ib_dma_alloc_coherent(struct ib_device *dev, + size_t size, + u64 *dma_handle, + gfp_t flag) +{ + if (dev->dma_ops) + return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag); + else { + dma_addr_t handle; + void *ret; + + ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag); + *dma_handle = handle; + return ret; + } +} + +/** + * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() + * @dev: The device for which the DMA addresses were allocated + * @size: The size of the region + * @cpu_addr: the address returned by ib_dma_alloc_coherent() + * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() + */ +static inline void ib_dma_free_coherent(struct ib_device *dev, + size_t size, void *cpu_addr, + u64 dma_handle) +{ + if (dev->dma_ops) + dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); + else + dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); +} + +#endif + +/** + * ib_reg_phys_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @phys_buf_array: Specifies a list of physical buffers to use in the + * memory region. + * @num_phys_buf: Specifies the size of the phys_buf_array. + * @mr_access_flags: Specifies the memory access rights. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + +/** + * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. + * Conceptually, this call performs the functions deregister memory region + * followed by register physical memory region. Where possible, + * resources are reused instead of deallocated and reallocated. + * @mr: The memory region to modify. + * @mr_rereg_mask: A bit-mask used to indicate which of the following + * properties of the memory region are being modified. + * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies + * the new protection domain to associated with the memory region, + * otherwise, this parameter is ignored. + * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies a list of physical buffers to use in the new + * translation, otherwise, this parameter is ignored. + * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies the size of the phys_buf_array, otherwise, this + * parameter is ignored. + * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this + * field specifies the new memory access rights, otherwise, this + * parameter is ignored. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +int ib_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + +/** + * ib_query_mr - Retrieves information about a specific memory region. + * @mr: The memory region to retrieve information about. + * @mr_attr: The attributes of the specified memory region. + */ +int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +/** + * ib_dereg_mr - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + */ +int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_alloc_mw - Allocates a memory window. + * @pd: The protection domain associated with the memory window. + */ +struct ib_mw *ib_alloc_mw(struct ib_pd *pd); + +/** + * ib_bind_mw - Posts a work request to the send queue of the specified + * QP, which binds the memory window to the given address range and + * remote access attributes. + * @qp: QP to post the bind work request on. + * @mw: The memory window to bind. + * @mw_bind: Specifies information about the memory window, including + * its address range, remote access rights, and associated memory region. + */ +static inline int ib_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* XXX reference counting in corresponding MR? */ + return mw->device->bind_mw ? + mw->device->bind_mw(qp, mw, mw_bind) : + -ENOSYS; +} + +/** + * ib_dealloc_mw - Deallocates a memory window. + * @mw: The memory window to deallocate. + */ +int ib_dealloc_mw(struct ib_mw *mw); + +/** + * ib_alloc_fmr - Allocates a unmapped fast memory region. + * @pd: The protection domain associated with the unmapped region. + * @mr_access_flags: Specifies the memory access rights. + * @fmr_attr: Attributes of the unmapped region. + * + * A fast memory region must be mapped before it can be used as part of + * a work request. + */ +struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +/** + * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. + * @fmr: The fast memory region to associate with the pages. + * @page_list: An array of physical pages to map to the fast memory region. + * @list_len: The number of pages in page_list. + * @iova: The I/O virtual address to use with the mapped region. + */ +static inline int ib_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova) +{ + return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); +} + +/** + * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. + * @fmr_list: A linked list of fast memory regions to unmap. + */ +int ib_unmap_fmr(struct list_head *fmr_list); + +/** + * ib_dealloc_fmr - Deallocates a fast memory region. + * @fmr: The fast memory region to deallocate. + */ +int ib_dealloc_fmr(struct ib_fmr *fmr); + +/** + * ib_attach_mcast - Attaches the specified QP to a multicast group. + * @qp: QP to attach to the multicast group. The QP must be type + * IB_QPT_UD. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + * + * In order to send and receive multicast packets, subnet + * administration must have created the multicast group and configured + * the fabric appropriately. The port associated with the specified + * QP must also be a member of the multicast group. + */ +int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ib_detach_mcast - Detaches the specified QP from a multicast group. + * @qp: QP to detach from the multicast group. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + */ +int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +#endif /* IB_VERBS_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs_ex.h b/branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs_ex.h new file mode 100644 index 00000000..6ce8bc74 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/ib_verbs_ex.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: al.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#pragma once + +typedef struct _FDO_DEVICE_DATA *PFDO_DEVICE_DATA; +struct ib_cq; + +/* extension for ib_device */ +struct ib_device_ex +{ + PFDO_DEVICE_DATA p_fdo; + int (*get_cached_gid)(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid); + int (*find_cached_gid)(struct ib_device *device, + union ib_gid *gid, u8 *port_num, u16 *index); + int (*get_cached_pkey)(struct ib_device *device, + u8 port_num, int index, u16 *pkey); + int (*find_cached_pkey)(struct ib_device *device, + u8 port_num, u16 pkey, u16 *index); +}; + + +/* extension for ib_ucontext */ +typedef struct { + PVOID uva; + PMDL mdl; + PVOID kva; + int mapped; +} umap_t; + +struct ib_ucontext_ex +{ + cl_list_item_t list_item; // chain of user contexts + umap_t uar; + umap_t bf; + atomic_t usecnt; /* count all resources */ + // for tools support + struct mutex mutex; + PMDL p_mdl; + PVOID va; + int fw_if_open; +}; + +/* extension for ib_cq */ +struct ib_cq_ex +{ + void * ctx; /* IBAL CQ context */ +}; + +/* extension for ib_qp */ +struct ib_qp_ex +{ + void * ctx; /* IBAL QP context */ +}; + +/* extension for ib_srq */ +struct ib_srq_ex +{ + void * ctx; /* IBAL SRQ context */ +}; + +/* extension for ib_event */ +struct ib_event_ex +{ + uint64_t vendor_specific; +}; + + diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/qp.h b/branches/ConnectX/hw/mlx4/kernel/inc/qp.h new file mode 100644 index 00000000..0320df57 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/qp.h @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_QP_H +#define MLX4_QP_H + +#include "device.h" + +#define MLX4_INVALID_LKEY 0x100 + +enum mlx4_qp_optpar { + MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MLX4_QP_OPTPAR_RRE = 1 << 1, + MLX4_QP_OPTPAR_RAE = 1 << 2, + MLX4_QP_OPTPAR_RWE = 1 << 3, + MLX4_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MLX4_QP_OPTPAR_Q_KEY = 1 << 5, + MLX4_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MLX4_QP_OPTPAR_SRA_MAX = 1 << 8, + MLX4_QP_OPTPAR_RRA_MAX = 1 << 9, + MLX4_QP_OPTPAR_PM_STATE = 1 << 10, + MLX4_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13, + MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16 +}; + +enum mlx4_qp_state { + MLX4_QP_STATE_RST = 0, + MLX4_QP_STATE_INIT = 1, + MLX4_QP_STATE_RTR = 2, + MLX4_QP_STATE_RTS = 3, + MLX4_QP_STATE_SQER = 4, + MLX4_QP_STATE_SQD = 5, + MLX4_QP_STATE_ERR = 6, + MLX4_QP_STATE_SQ_DRAINING = 7, + MLX4_QP_NUM_STATE +}; + +enum { + MLX4_QP_ST_RC = 0x0, + MLX4_QP_ST_UC = 0x1, + MLX4_QP_ST_RD = 0x2, + MLX4_QP_ST_UD = 0x3, + MLX4_QP_ST_MLX = 0x7 +}; + +enum { + MLX4_QP_PM_MIGRATED = 0x3, + MLX4_QP_PM_ARMED = 0x0, + MLX4_QP_PM_REARM = 0x1 +}; + +enum { + /* params1 */ + MLX4_QP_BIT_SRE = 1 << 15, + MLX4_QP_BIT_SWE = 1 << 14, + MLX4_QP_BIT_SAE = 1 << 13, + /* params2 */ + MLX4_QP_BIT_RRE = 1 << 15, + MLX4_QP_BIT_RWE = 1 << 14, + MLX4_QP_BIT_RAE = 1 << 13, + MLX4_QP_BIT_RIC = 1 << 4, +}; + +struct mlx4_qp_path { + u8 fl; + u8 reserved1[2]; + u8 pkey_index; + u8 reserved2; + u8 grh_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 tclass_flowlabel; + u8 rgid[16]; + u8 sched_queue; + u8 snooper_flags; + u8 reserved3[2]; + u8 counter_index; + u8 reserved4[7]; +}; + +struct mlx4_qp_context { + __be32 flags; + __be32 pd; + u8 mtu_msgmax; + u8 rq_size_stride; + u8 sq_size_stride; + u8 rlkey; + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + struct mlx4_qp_path pri_path; + struct mlx4_qp_path alt_path; + __be32 params1; + u32 reserved1; + __be32 next_send_psn; + __be32 cqn_send; + u32 reserved2[2]; + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 srcd; + __be32 cqn_recv; + __be64 db_rec_addr; + __be32 qkey; + __be32 srqn; + __be32 msn; + __be16 rq_wqe_counter; + __be16 sq_wqe_counter; + u32 reserved3[2]; + __be32 param3; + __be32 nummmcpeers_basemkey; + u8 log_page_size; + u8 reserved4[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + u32 reserved5[10]; +}; + +enum { + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICITED = 1 << 1, +}; + +struct mlx4_wqe_ctrl_seg { + __be32 owner_opcode; + u8 reserved2[3]; + u8 fence_size; + /* + * High 24 bits are SRC remote buffer; low 8 bits are flags: + * [7] SO (strong ordering) + * [5] TCP/UDP checksum + * [4] IP checksum + * [3:2] C (generate completion queue entry) + * [1] SE (solicited event) + */ + __be32 srcrb_flags; + /* + * imm is immediate data for send/RDMA write w/ immediate; + * also invalidation key for send with invalidate; input + * modifier for WQEs on CCQs. + */ + __be32 imm; +}; + +enum { + MLX4_WQE_MLX_VL15 = 1 << 17, + MLX4_WQE_MLX_SLR = 1 << 16 +}; + +struct mlx4_wqe_mlx_seg { + u8 owner; + u8 reserved1[2]; + u8 opcode; + u8 reserved2[3]; + u8 size; + /* + * [17] VL15 + * [16] SLR + * [15:12] static rate + * [11:8] SL + * [4] ICRC + * [3:2] C + * [0] FL (force loopback) + */ + __be32 flags; + __be16 rlid; + u16 reserved3; +}; + +struct mlx4_wqe_datagram_seg { + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + __be32 reservd[2]; +}; + +struct mlx4_wqe_bind_seg { + __be32 flags1; + __be32 flags2; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; +}; + +struct mlx4_wqe_fmr_seg { + __be32 flags; + __be32 mem_key; + __be64 buf_list; + __be64 start_addr; + __be64 reg_len; + __be32 offset; + __be32 page_size; + u32 reserved[2]; +}; + +struct mlx4_wqe_fmr_ext_seg { + u8 flags; + u8 reserved; + __be16 app_mask; + __be16 wire_app_tag; + __be16 mem_app_tag; + __be32 wire_ref_tag_base; + __be32 mem_ref_tag_base; +}; + +struct mlx4_wqe_local_inval_seg { + u8 flags; + u8 reserved1[3]; + __be32 mem_key; + u8 reserved2[3]; + u8 guest_id; + __be64 pa; +}; + +struct mlx4_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mlx4_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mlx4_wqe_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +enum { + MLX4_INLINE_ALIGN = 64, +}; + +struct mlx4_wqe_inline_seg { + __be32 byte_count; +}; + +int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, + struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, + int sqd_event, struct mlx4_qp *qp); + +int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, + struct mlx4_qp_context *context); + +static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) +{ + return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); +} + +void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); + +#endif /* MLX4_QP_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/inc/srq.h b/branches/ConnectX/hw/mlx4/kernel/inc/srq.h new file mode 100644 index 00000000..799a0697 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/inc/srq.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_SRQ_H +#define MLX4_SRQ_H + +struct mlx4_wqe_srq_next_seg { + u16 reserved1; + __be16 next_wqe_index; + u32 reserved2[3]; +}; + +#endif /* MLX4_SRQ_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/iobuf.h b/branches/ConnectX/hw/mlx4/kernel/iobuf.h new file mode 100644 index 00000000..44002497 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/iobuf.h @@ -0,0 +1,53 @@ +#pragma once + +typedef struct { + u64 va; /* virtual address of the buffer */ + u64 size; /* size in bytes of the buffer */ + LIST_ENTRY seg_que; + u32 nr_pages; + int is_user; + int seg_num; + int is_cashed; +} iobuf_t; + +/* iterator for getting segments of tpt */ +typedef struct _iobuf_iter { + void * seg_p; /* the item from where to take the next translations */ + unsigned int pfn_ix; /* index from where to take the next translation */ +} iobuf_iter_t; + +void iobuf_deregister_with_cash(IN iobuf_t *iobuf_p); + +void iobuf_deregister(IN iobuf_t *iobuf_p); + +void iobuf_init( + IN u64 va, + IN u64 size, + IN int is_user, + IN OUT iobuf_t *iobuf_p); + +int iobuf_register_with_cash( + IN u64 vaddr, + IN u64 size, + IN int is_user, + IN OUT enum ib_access_flags *acc_p, + IN OUT iobuf_t *iobuf_p); + +int iobuf_register( + IN u64 va, + IN u64 size, + IN int is_user, + IN ib_access_t acc, + IN OUT iobuf_t *iobuf_p); + +void iobuf_iter_init( + IN iobuf_t *iobuf_p, + IN OUT iobuf_iter_t *iterator_p); + +uint32_t iobuf_get_tpt_seg( + IN iobuf_t *iobuf_p, + IN OUT iobuf_iter_t *iterator_p, + IN uint32_t n_pages_in, + IN OUT uint64_t *page_tbl_p ); + + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w.h b/branches/ConnectX/hw/mlx4/kernel/l2w.h new file mode 100644 index 00000000..255dfe7b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w.h @@ -0,0 +1,311 @@ +#pragma once + +#ifndef L2W_H +#define L2W_H + +//////////////////////////////////////////////////////// +// +// GENERAL INCLUDES +// +//////////////////////////////////////////////////////// + +// OS +#include +#include +#include +#include +#include +#define NTSTRSAFE_LIB +#include + +// complib +#include +#include +#include + +// mlx4 +#include "vc.h" + + +//////////////////////////////////////////////////////// +// +// LITERALS +// +//////////////////////////////////////////////////////// + +#define BITS_PER_LONG 32 +#define N_BARS 3 +#define HZ 1000000 /* 1 sec in usecs */ +#define EOPNOTSUPP 95 + + +//////////////////////////////////////////////////////// +// +// SUBSTITUTIONS +// +//////////////////////////////////////////////////////// + +#define BUG_ON(exp) ASSERT(!(exp)) /* in Linux follows here panic() !*/ +#define WARN_ON(exp) ASSERT(!(exp)) /* in Linux follows here panic() !*/ +#define snprintf _snprintf +#define printk DbgPrint +#define KERN_ERR "err:" +#define KERN_WARNING "warn:" +#define KERN_DEBUG "dbg:" + +// memory barriers +#define wmb KeMemoryBarrier +#define rmb KeMemoryBarrier +#define mb KeMemoryBarrier +// TODO: can we make it empty ? I saw in Linux, it is an empty macro for x86 & x64 +#define mmiowb KeMemoryBarrier + +// linker +#define EXPORT_SYMBOL_GPL(a) + +// gcc compiler attributes +#define __devinit +#define __devinitdata +#define __init +#define __exit +#define __force +#define __iomem +#define __attribute_const__ +#define likely(x) (x) +#define unlikely(x) (x) +#define __attribute__(a) +#define __bitwise + +// container_of +#define container_of CONTAINING_RECORD + +// inline +#define inline __inline + +// new Linux event mechanism +#define complete(a) wake_up(a) + +// convert +#define __constant_htons CL_HTON16 +#define __constant_cpu_to_be32 CL_HTON32 + +// various +#define __always_inline inline + +//////////////////////////////////////////////////////// +// +// TYPES +// +//////////////////////////////////////////////////////// + +// basic types +typedef unsigned char u8, __u8; +typedef unsigned short int u16, __u16; +typedef unsigned int u32, __u32; +typedef unsigned __int64 u64, __u64; +typedef char s8, __s8; +typedef short int s16, __s16; +typedef int s32, __s32; +typedef __int64 s64, __s64; + +// inherited +typedef u16 __le16; +typedef u16 __be16; +typedef u32 __le32; +typedef u32 __be32; +typedef u64 __le64; +typedef u64 __be64; +typedef u64 io_addr_t; + +// dummy function +typedef void (*MT_EMPTY_FUNC)(); + +// PCI BAR descriptor +typedef enum _hca_bar_type +{ + HCA_BAR_TYPE_HCR, + HCA_BAR_TYPE_UAR, + HCA_BAR_TYPE_DDR, + HCA_BAR_TYPE_MAX + +} hca_bar_type_t; + + +typedef struct _hca_bar +{ + uint64_t phys; + void *virt; + SIZE_T size; + +} hca_bar_t; + + +// interface structure between Upper and Low Layers of the driver +struct pci_dev +{ + // driver: OS/platform resources + BUS_INTERFACE_STANDARD bus_pci_ifc; + PCI_COMMON_CONFIG pci_cfg_space; + uplink_info_t uplink_info; + // driver: card resources + hca_bar_t bar[N_BARS]; + CM_PARTIAL_RESOURCE_DESCRIPTOR int_info; /* HCA interrupt resources */ + // driver: various objects and info + USHORT ven_id; + USHORT dev_id; + DMA_ADAPTER * p_dma_adapter; /* HCA adapter object */ + DEVICE_OBJECT * p_self_do; /* mlx4 FDO */ + // mlx4_net: various objects and info + struct mlx4_dev * dev; + volatile long dpc_lock; +#ifdef USE_WDM_INTERRUPTS + PKINTERRUPT int_obj; /* HCA interrupt object */ + KSPIN_LOCK isr_lock; /* lock for the ISR */ +#endif + // mlx4_ib: various objects and info + struct ib_device * ib_dev; +}; + +/* DPC */ +typedef void (*dpc_t)( struct _KDPC *, PVOID, PVOID, PVOID ); + + +//////////////////////////////////////////////////////// +// +// MACROS +// +//////////////////////////////////////////////////////// + +// conversions +#define swab32(a) _byteswap_ulong((ULONG)(a)) +#define cpu_to_be16(a) _byteswap_ushort((USHORT)(a)) +#define be16_to_cpu(a) _byteswap_ushort((USHORT)(a)) +#define cpu_to_be32(a) _byteswap_ulong((ULONG)(a)) +#define be32_to_cpu(a) _byteswap_ulong((ULONG)(a)) +#define cpu_to_be64(a) _byteswap_uint64((UINT64)(a)) +#define be64_to_cpu(a) _byteswap_uint64((UINT64)(a)) +#define be64_to_cpup(p) _byteswap_uint64(*(PUINT64)(p)) +#define be32_to_cpup(p) _byteswap_ulong(*(PULONG)(p)) +#define be16_to_cpup(p) _byteswap_ushort(*(PUSHORT)(p)) + +// ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +// ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) +#define PTR_ALIGN(size) (((size) + sizeof(void*) - 1) & ~(sizeof(void*) - 1)) + +// there is a bug in Microsoft compiler, that when _byteswap_uint64() gets an expression +// it executes the expression but doesn't swap tte dwords +// So, there's a workaround +#ifdef BYTESWAP_UINT64_BUG_FIXED +#define CPU_2_BE64_PREP +#define CPU_2_BE64(x) cl_hton64(x) +#else +#define CPU_2_BE64_PREP unsigned __int64 __tmp__ +#define CPU_2_BE64(x) ( __tmp__ = x, cl_hton64(__tmp__) ) +#endif + +#define ERR_PTR(error) ((void*)(LONG_PTR)(error)) +#define PTR_ERR(ptr) ((long)(LONG_PTR)(void*)(ptr)) +//TODO: there are 2 assumptions here: +// - pointer can't be too big (around -1) +// - error can't be bigger than 1000 +#define IS_ERR(ptr) ((ULONG_PTR)ptr > (ULONG_PTR)-1000L) + +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) + +#ifndef ETIMEDOUT +#define ETIMEDOUT (110) +#endif + +#ifdef PAGE_ALIGN +#undef PAGE_ALIGN +#define PAGE_ALIGN(Va) ((u64)((ULONG_PTR)(Va) & ~(PAGE_SIZE - 1))) +#endif + +#define NEXT_PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* typed minimum */ +#define min_t(type,x,y) ((type)(x) < (type)(y) ? (type)(x) : (type)(y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define EXPORT_SYMBOL(name) +#ifndef USE_WDM_INTERRUPTS +#define free_irq(pdev) +#endif + +static inline NTSTATUS errno_to_ntstatus(int err) +{ +#define MAP_ERR(err,ntstatus) case err: status = ntstatus; break + NTSTATUS status; + + if (!err) + return STATUS_SUCCESS; + + if (err < 0) + err = -err; + switch (err) { + MAP_ERR( ENOENT, STATUS_NOT_FOUND ); + MAP_ERR( EAGAIN, STATUS_DEVICE_BUSY ); + MAP_ERR( ENOMEM, STATUS_NO_MEMORY ); + MAP_ERR( EACCES, STATUS_ACCESS_DENIED ); + MAP_ERR( EFAULT, STATUS_DRIVER_INTERNAL_ERROR ); + MAP_ERR( EBUSY, STATUS_INSUFFICIENT_RESOURCES ); + MAP_ERR( ENODEV, STATUS_NOT_SUPPORTED ); + MAP_ERR( EINVAL, STATUS_INVALID_PARAMETER ); + MAP_ERR( ENOSYS, STATUS_NOT_SUPPORTED ); + default: + status = STATUS_UNSUCCESSFUL; + break; + } + return status; +} + + +//////////////////////////////////////////////////////// +// +// PROTOTYPES +// +//////////////////////////////////////////////////////// + +SIZE_T strlcpy(char *dest, const void *src, SIZE_T size); +int core_init(); +void core_cleanup(); + + +//////////////////////////////////////////////////////// +// +// SPECIFIC INCLUDES +// +//////////////////////////////////////////////////////// + +struct mlx4_dev; +struct mlx4_priv; + + +#include "mlx4_debug.h" +#include +#include +#include +#include "l2w_debug.h" +#include +#include +#include +#include +#include +#include "l2w_radix.h" +#include +#include +#include + +#include "device.h" + +static inline int mlx4_is_livefish(struct mlx4_dev *dev) +{ + return dev->flags & MLX4_FLAG_LIVEFISH; +} + +#endif diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_atomic.h b/branches/ConnectX/hw/mlx4/kernel/l2w_atomic.h new file mode 100644 index 00000000..51bd7779 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_atomic.h @@ -0,0 +1,47 @@ +#pragma once + +#include "complib/cl_atomic.h" + +typedef volatile __int32 atomic_t; /* as atomic32_t */ + +#define atomic_inc cl_atomic_inc +#define atomic_dec cl_atomic_dec + +static inline atomic_t atomic_read(atomic_t *pval) +{ + return *pval; +} + +static inline void atomic_set(atomic_t *pval, long val) +{ + *pval = (__int32)val; +} + +/** +* atomic_inc_and_test - decrement and test +* pval: pointer of type atomic_t +* +* Atomically increments pval by 1 and +* returns true if the result is 0, or false for all other +* cases. +*/ +static inline int +atomic_inc_and_test(atomic_t *pval) +{ + return cl_atomic_inc(pval) == 0; +} + +/** +* atomic_dec_and_test - decrement and test +* pval: pointer of type atomic_t +* +* Atomically decrements pval by 1 and +* returns true if the result is 0, or false for all other +* cases. +*/ +static inline int +atomic_dec_and_test(atomic_t *pval) +{ + return cl_atomic_dec(pval) == 0; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_bit.h b/branches/ConnectX/hw/mlx4/kernel/l2w_bit.h new file mode 100644 index 00000000..ad467ae6 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_bit.h @@ -0,0 +1,186 @@ +#pragma once + +// Nth element of the table contains the index of the first set bit of N; 8 - for N=0 +extern char g_set_bit_tbl[256]; +// Nth element of the table contains the index of the first cleared bit of N; 8 - for N=0 +extern char g_clr_bit_tbl[256]; + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +/** +* _ffs_raw - find the first one bit in a word +* @addr: The address to start the search at +* @offset: The bitnumber to start searching at +* +* returns: 0 - if not found or N+1, if found Nth bit +*/ +static __inline int _ffs_raw(const unsigned long *addr, int offset) +{ + //TODO: not an effective code - is better in Assembler + int mask; + int rbc; + int ix; + if (!*addr) return 0; + mask = 1 << offset; + rbc = BITS_PER_LONG - offset; + for (ix=0; ix> 5); + + // search in the first word while we are in the middle + if (ix) { + res = _ffz_raw(p, ix); + if (res) + return set + res - 1; + ++p; + set += BITS_PER_LONG; + } + + // search the rest of the bitmap + res = find_first_zero_bit(p, bits_size - (unsigned)(32 * (p - addr))); + return res + set; +} + +/* The functions works only for 32-bit values (not as in Linux ) */ +/* on val=0 will return '-1' */ +static inline int ilog2(u32 val) +{ + ASSERT(val); + return fls(val) - 1; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_bitmap.h b/branches/ConnectX/hw/mlx4/kernel/l2w_bitmap.h new file mode 100644 index 00000000..36e6b417 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_bitmap.h @@ -0,0 +1,112 @@ +#pragma once + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +/** +* atomic_set_bit - Atomically set a bit in memory +* @nr: the bit to set +* @addr: the address to start counting from +* +* This function is atomic and may not be reordered. See __set_bit() +* if you do not require the atomic guarantees. +* +* Note: there are no guarantees that this function will not be reordered +* on non x86 architectures, so if you are writting portable code, +* make sure not to rely on its reordering guarantees. +* +* Note that @nr may be almost arbitrarily large; this function is not +* restricted to acting on a single-word quantity. +*/ +static inline unsigned long atomic_set_bit(int nr, volatile long * addr) +{ + return InterlockedOr( addr, (1 << nr) ); +} + +/** +* atomic_clear_bit - Clears a bit in memory +* @nr: Bit to clear +* @addr: Address to start counting from +* +* clear_bit() is atomic and may not be reordered. However, it does +* not contain a memory barrier, so if it is used for locking purposes, +* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() +* in order to ensure changes are visible on other processors. +*/ +static inline unsigned long atomic_clear_bit(int nr, volatile long * addr) +{ + return InterlockedAnd( addr, ~(1 << nr) ); +} + +static inline int set_bit(int nr,unsigned long * addr) +{ + addr += nr >> 5; + return atomic_set_bit( nr & 0x1f, (volatile long *)addr ); +} + +static inline int clear_bit(int nr, unsigned long * addr) +{ + addr += nr >> 5; + return atomic_clear_bit( nr & 0x1f, (volatile long *)addr ); +} + +static inline int test_bit(int nr, const unsigned long * addr) +{ + int mask; + + addr += nr >> 5; + mask = 1 << (nr & 0x1f); + return ((mask & *addr) != 0); +} + + +/** +* bitmap_zero - clear the bitmap +* @dst: the bitmap address +* @nbits: the bitmap size in bits +* +*/ +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + RtlZeroMemory(dst, len); + } +} + +#define BITMAP_LAST_WORD_MASK(nbits) \ + ( ((nbits) % BITS_PER_LONG) ? (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL ) + +int __bitmap_full(const unsigned long *bitmap, int bits); + +static inline int bitmap_full(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_full(src, nbits); +} + +int __bitmap_empty(const unsigned long *bitmap, int bits); + +static inline int bitmap_empty(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_empty(src, nbits); +} + +static inline void bitmap_fill(unsigned long *dst, int nbits) +{ + size_t nlongs = BITS_TO_LONGS(nbits); + if (nlongs > 1) { + int len = (int)((nlongs - 1) * sizeof(unsigned long)); + memset(dst, 0xff, len); + } + dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_debug.h b/branches/ConnectX/hw/mlx4/kernel/l2w_debug.h new file mode 100644 index 00000000..022f28ae --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_debug.h @@ -0,0 +1,45 @@ +#pragma once + +VOID +WriteEventLogEntryStr( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + PWCHAR pi_InsertionStr, + ULONG pi_nDataItems, + ... + ); + +void +mlx4_err( + IN struct mlx4_dev * mdev, + IN char* format, + ... + ); + +void +mlx4_dbg( + IN struct mlx4_dev * mdev, + IN char* format, + ... + ); + +VOID +dev_err( + IN struct mlx4_dev ** mdev, + IN char* format, + ... + ); + +VOID +dev_info( + IN struct mlx4_dev ** p_mdev, + IN char* format, + ... + ); + +#define mlx4_warn mlx4_err +#define mlx4_info mlx4_dbg +#define dev_warn dev_err + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_list.h b/branches/ConnectX/hw/mlx4/kernel/l2w_list.h new file mode 100644 index 00000000..a9ffcc57 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_list.h @@ -0,0 +1,194 @@ + +// taken from list.h + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* +* Simple doubly linked list implementation. +* +* Some of the internal functions ("__xxx") are useful when +* manipulating whole lists rather than single entries, as +* sometimes we already know the next/prev entries and we can +* generate better code by using them directly rather than +* using the generic single-entry routines. +*/ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) \ + (ptr)->next = (ptr); (ptr)->prev = (ptr) + + +/* +* Insert a new entry between two known consecutive entries. +* +* This is only for internal list manipulation where we know +* the prev/next entries already! +*/ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** +* list_add - add a new entry +* @new: new entry to be added +* @head: list head to add it after +* +* Insert a new entry after the specified head. +* This is good for implementing stacks. +*/ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** +* list_add_tail - add a new entry +* @new: new entry to be added +* @head: list head to add it before +* +* Insert a new entry before the specified head. +* This is useful for implementing queues. +*/ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + + /* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + + /** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +/** +* list_empty - tests whether a list is empty +* @head: the list to test. +*/ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + + static inline void __list_splice(struct list_head *list, + struct list_head *head) + { + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; + } + + /** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ + static inline void list_splice_init(struct list_head *list, + struct list_head *head) + { + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } + } + + /** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry - iterate over list of given type +* @pos: the type * to use as a loop counter. +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +*/ +#define list_for_each_entry(pos, head, member,type) \ + for (pos = list_entry((head)->next, type, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, type, member)) + + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry_reverse - iterate backwards over list of given type. +* @pos: the type * to use as a loop counter. +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +*/ +#define list_for_each_entry_reverse(pos, head, member,type) \ + for (pos = list_entry((head)->prev, type, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, type, member)) + + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry +* @pos: the type * to use as a loop counter. +* @n: another type * to use as temporary storage +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +* @type_n: typeof(*n) +*/ +#define list_for_each_entry_safe(pos, n, head, member,type,type_n) \ + for (pos = list_entry((head)->next, type, member), \ + n = list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, type_n, member)) + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_memory.h b/branches/ConnectX/hw/mlx4/kernel/l2w_memory.h new file mode 100644 index 00000000..b5e8b27f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_memory.h @@ -0,0 +1,332 @@ +#pragma once + +#include "iobuf.h" + +//////////////////////////////////////////////////////// +// +// CONSTANTS +// +//////////////////////////////////////////////////////// + +#define MT_TAG_ATOMIC 'MOTA' +#define MT_TAG_KERNEL 'LNRK' +#define MT_TAG_HIGH 'HGIH' +#define MT_TAG_PCIPOOL 'PICP' +#define MT_TAG_IOMAP 'PAMI' + +//////////////////////////////////////////////////////// +// +// SUBSTITUTIONS +// +//////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////// +// +// MACROS +// +//////////////////////////////////////////////////////// + +#define PAGE_MASK (~(PAGE_SIZE-1)) + + + //////////////////////////////////////////////////////// + // + // Helper functions + // + //////////////////////////////////////////////////////// + +// returns log of number of pages, i.e +// for size <= 4096 ==> 0 +// for size <= 8192 ==> 1 +static inline int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +static inline unsigned long roundup_pow_of_two(unsigned long x) +{ + return (1UL << fls(x - 1)); +} + + + +//////////////////////////////////////////////////////// +// +// SYSTEM MEMORY +// +//////////////////////////////////////////////////////// + +typedef enum _gfp { + __GFP_NOWARN = 0, /* Suppress page allocation failure warning */ + __GFP_HIGHMEM = 0, /* high memory */ + GFP_ATOMIC = 1, /* can't wait (i.e. DPC or higher) */ + GFP_KERNEL = 2, /* can wait (npaged) */ + GFP_HIGHUSER = 4 /* GFP_KERNEL, that can be in HIGH memory */ +} +gfp_t; + +struct vm_area_struct { + void * ptr; +}; + +static inline void * kmalloc( SIZE_T bsize, gfp_t gfp_mask) +{ + void *ptr; + ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + switch (gfp_mask) { + case GFP_ATOMIC: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_ATOMIC ); + break; + case GFP_KERNEL: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_KERNEL ); + break; + case GFP_HIGHUSER: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_HIGH ); + break; + default: + DbgPrint("kmalloc: unsupported flag %d\n", gfp_mask); + ptr = NULL; + break; + } + return ptr; +} + +static inline void * kzalloc( SIZE_T bsize, gfp_t gfp_mask) +{ + void* va = kmalloc(bsize, gfp_mask); + if (va) + RtlZeroMemory(va, bsize); + return va; +} + +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + if (n != 0 && size > ULONG_MAX / n) + return NULL; + return kzalloc(n * size, flags); +} + +static inline void kfree (const void *pobj) +{ + ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + if (pobj) + ExFreePool((void *)pobj); +} + +#define get_zeroed_page(mask) kzalloc(PAGE_SIZE, mask) +#define free_page(ptr) kfree(ptr) + + +//////////////////////////////////////////////////////// +// +// IO SPACE <==> SYSTEM MEMORY +// +//////////////////////////////////////////////////////// + +/** +* ioremap - map bus memory into CPU space +* @addr: bus address of the memory +* @size: size of the resource to map +* +* ioremap performs a platform specific sequence of operations to +* make bus memory CPU accessible via the readb/readw/readl/writeb/ +* writew/writel functions and the other mmio helpers. The returned +* address is not guaranteed to be usable directly as a virtual +* address. +*/ +static inline void *ioremap(io_addr_t addr, SIZE_T size) +{ + PHYSICAL_ADDRESS pa; + void *va; + + ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + pa.QuadPart = addr; + va = MmMapIoSpace( pa, size, MmNonCached ); + return va; +} + +static inline void iounmap(void *va, SIZE_T size) +{ + MmUnmapIoSpace( va, size ); +} + + +//////////////////////////////////////////////////////// +// +// DMA SUPPORT +// +//////////////////////////////////////////////////////// + +enum dma_data_direction { + PCI_DMA_BIDIRECTIONAL, + PCI_DMA_TODEVICE, + DMA_TO_DEVICE = PCI_DMA_TODEVICE +}; + +#define dma_get_cache_alignment (int)KeGetRecommendedSharedDataAlignment + +// wrapper to DMA address +typedef struct _dma_addr +{ + // TODO: in some cases it is still physical address today + io_addr_t da; /* logical (device) address */ + void * va; /* kernel virtual address */ + unsigned long sz; /* buffer size */ +} dma_addr_t; + +#define lowmem_page_address(dma_addr) ((dma_addr).va) + +struct mlx4_dev; + +void *alloc_cont_mem( + IN struct pci_dev *pdev, + IN unsigned long size, + OUT dma_addr_t*p_dma_addr); + +void free_cont_mem( + IN struct pci_dev *pdev, + IN dma_addr_t*p_dma_addr); + +// TODO: translate to DMA space - for now is not done anything +static inline dma_addr_t pci_map_page(struct pci_dev *pdev, + dma_addr_t dma_addr, unsigned long offset, SIZE_T size, int direction) +{ + UNUSED_PARAM(pdev); + UNUSED_PARAM(offset); + UNUSED_PARAM(size); + UNUSED_PARAM(direction); + + return dma_addr; +} + +static inline dma_addr_t +alloc_pages( struct pci_dev *pdev, gfp_t gfp, int order ) +{ + dma_addr_t dma_addr; + UNUSED_PARAM(gfp); + alloc_cont_mem( pdev, PAGE_SIZE << order, &dma_addr ); + return dma_addr; +} + +#define alloc_page(pdev, mask) alloc_pages(pdev, (mask), 0) +#define __get_free_page(mask) kzalloc(PAGE_SIZE, mask) + +static inline void +__free_pages( struct pci_dev *pdev, dma_addr_t dma_addr, int order ) +{ + UNUSED_PARAM(order); + ASSERT((PAGE_SIZE << order) == (int)dma_addr.sz); + free_cont_mem( pdev, &dma_addr ); +} + +#define __free_page(pdev, dma_addr) __free_pages(pdev, (dma_addr), 0) + + + +static inline int pci_dma_mapping_error(dma_addr_t dma_addr) +{ + return !dma_addr.sz; +} + +static inline void pci_unmap_page(struct pci_dev *pdev, + dma_addr_t dma_addr, SIZE_T size, int direction) +{ + UNUSED_PARAM(pdev); + UNUSED_PARAM(dma_addr); + UNUSED_PARAM(size); + UNUSED_PARAM(direction); +} + +static inline void +dma_sync_single( struct mlx4_dev **dev, dma_addr_t dma_addr, + size_t size, int direction) +{ + UNUSED_PARAM(dev); + UNUSED_PARAM(dma_addr); + UNUSED_PARAM(size); + UNUSED_PARAM(direction); + // TODO: here is to be used FlushAdapterBuffers() +} + +void * +dma_alloc_coherent( struct mlx4_dev **dev, size_t size, + dma_addr_t *p_dma, gfp_t gfp ); + +void dma_free_coherent( struct mlx4_dev **dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + + void pci_free_consistent( struct pci_dev *pdev, size_t size, + void *vaddr, dma_addr_t dma_handle); + + + +//////////////////////////////////////////////////////// +// +// SG lists +// +//////////////////////////////////////////////////////// + +#define sg_dma_addr(sg) ((sg)->dma_addr) +#define sg_dma_address(sg) ((sg)->dma_addr.da) +#define sg_dma_len(sg) ((sg)->dma_addr.sz) +#define sg_dma_address_inc(p_dma,val) (p_dma)->da += val +#define sg_page(sg) ((sg)->dma_addr) + +struct scatterlist { + dma_addr_t dma_addr; /* logical (device) address */ + unsigned int offset; /* offset in the first page */ + PMDL p_mdl; /* MDL, if any (used for user space buffers) */ +}; + +#define offset_in_page(va) ((ULONG)((ULONG_PTR)(va) & ~PAGE_MASK)) + +static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); +} + +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + UNUSED_PARAM(buflen); + ASSERT(sg->dma_addr.sz == buflen); + sg->offset = offset_in_page(buf); +} + +static inline void sg_set_page(struct scatterlist *sg, + dma_addr_t dma_addr, unsigned int len, unsigned int offset) +{ + UNUSED_PARAM(len); + sg->offset = offset; + sg->dma_addr = dma_addr; +} + +/* Returns: the number of unmapped sg elements */ +static inline int pci_map_sg(struct pci_dev *pdev, + struct scatterlist *sg, int nents, int direction) +{ + UNUSED_PARAM(pdev); + UNUSED_PARAM(sg); + UNUSED_PARAM(direction); + return nents; +} + +/* Returns: the number of unmapped sg elements */ +static inline int pci_unmap_sg(struct pci_dev *pdev, + struct scatterlist *sg, int nents, int direction) +{ + UNUSED_PARAM(pdev); + UNUSED_PARAM(sg); + UNUSED_PARAM(direction); + return nents; +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_pci.h b/branches/ConnectX/hw/mlx4/kernel/l2w_pci.h new file mode 100644 index 00000000..f47fe0e4 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_pci.h @@ -0,0 +1,108 @@ +#pragma once + +// =========================================== +// LITERALS +// =========================================== + +#define DEVID_HERMON_SDR 0x6340 /* 25408 */ +#define DEVID_HERMON_DDR 0x634a /* 25418 */ +#define DEVID_HERMON_QDR 0x6354 /* 25428 */ +#define DEVID_HERMON_DDR_G2 0x6732 /* 26418 */ +#define DEVID_HERMON_QDR_G2 0x673c /* 26428 */ +/* livefish */ +#define DEVID_HERMON_BD 0x0191 /* 401 */ + +/* Types of supported HCA */ +typedef enum __hca_type { + HERMON, /* fully functional HCA */ + LIVEFISH /* a burning device */ +} hca_type_t; + +/* vendors */ +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#define PCI_VENDOR_ID_TOPSPIN 0x1867 + +#define HCA(v, d, t) \ + { PCI_VENDOR_ID_##v, DEVID_HERMON_##d, t } + +static struct pci_device_id { + USHORT vendor; + USHORT device; + hca_type_t driver_data; +}; + + +// =========================================== +// TYPES +// =========================================== + + +// =========================================== +// MACROS/FUNCTIONS +// =========================================== + +NTSTATUS pci_hca_reset( struct pci_dev *pdev); + +/* use shim to implement that */ +#define mlx4_reset(dev) pci_hca_reset(dev->pdev) + +// get bar boundaries +#define pci_resource_start(dev,bar_num) ((dev)->bar[bar_num >> 1].phys) +#define pci_resource_len(dev,bar_num) ((dev)->bar[bar_num >> 1].size) + +// i/o to registers + +static inline u64 readq(const volatile void __iomem *addr) +{ + //TODO: write atomic implementation of _IO_READ_QWORD and change mthca_doorbell.h + u64 val; + READ_REGISTER_BUFFER_ULONG((PULONG)(addr), (PULONG)&val, 2 ); + return val; +} + +static inline u32 readl(const volatile void __iomem *addr) +{ + return READ_REGISTER_ULONG((PULONG)(addr)); +} + +static inline u16 reads(const volatile void __iomem *addr) +{ + return READ_REGISTER_USHORT((PUSHORT)(addr)); +} + +static inline u8 readb(const volatile void __iomem *addr) +{ + return READ_REGISTER_UCHAR((PUCHAR)(addr)); +} + +#define __raw_readq readq +#define __raw_readl readl +#define __raw_reads reads +#define __raw_readb readb + +static inline void writeq(unsigned __int64 val, volatile void __iomem *addr) +{ + //TODO: write atomic implementation of _IO_WRITE_QWORD and change mthca_doorbell.h + WRITE_REGISTER_BUFFER_ULONG( (PULONG)(addr), (PULONG)&val, 2 ); +} + +static inline void writel(unsigned int val, volatile void __iomem *addr) +{ + WRITE_REGISTER_ULONG((PULONG)(addr),val); +} + +static inline void writes(unsigned short val, volatile void __iomem *addr) +{ + WRITE_REGISTER_USHORT((PUSHORT)(addr),val); +} + +static inline void writeb(unsigned char val, volatile void __iomem *addr) +{ + WRITE_REGISTER_UCHAR((PUCHAR)(addr),val); +} + +#define __raw_writeq writeq +#define __raw_writel writel +#define __raw_writes writes +#define __raw_writeb writeb + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_pcipool.h b/branches/ConnectX/hw/mlx4/kernel/l2w_pcipool.h new file mode 100644 index 00000000..fd17b8fb --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_pcipool.h @@ -0,0 +1,102 @@ +#pragma once + +typedef struct pci_pool { + size_t size; + struct mlx4_dev * mdev; + char name [32]; + NPAGED_LOOKASIDE_LIST pool_hdr; +} pci_pool_t; + +// taken from dmapool.c + +/** +* pci_pool_create - Creates a pool of consistent memory blocks, for dma. +* @name: name of pool, for diagnostics +* @mdev: device that will be doing the DMA +* @size: size of the blocks in this pool. +* @align: alignment requirement for blocks; must be a power of two +* @allocation: returned blocks won't cross this boundary (or zero) +* Context: !in_interrupt() +* +* Returns a dma allocation pool with the requested characteristics, or +* null if one can't be created. Given one of these pools, dma_pool_alloc() +* may be used to allocate memory. Such memory will all have "consistent" +* DMA mappings, accessible by the device and its driver without using +* cache flushing primitives. The actual size of blocks allocated may be +* larger than requested because of alignment. +* +* If allocation is nonzero, objects returned from dma_pool_alloc() won't + * cross that size boundary. This is useful for devices which have + * addressing restrictions on individual DMA transfers, such as not crossing + * boundaries of 4KBytes. + */ + +pci_pool_t * +pci_pool_create (const char *name, struct pci_dev *pdev, + size_t size, size_t align, size_t allocation); + +/** + * dma_pool_alloc - get a block of consistent memory + * @pool: dma pool that will produce the block + * @mem_flags: GFP_* bitmask + * @handle: pointer to dma address of block + * + * This returns the kernel virtual address of a currently unused block, + * and reports its dma address through the handle. + * If such a memory block can't be allocated, null is returned. + */ +static inline void * +pci_pool_alloc (pci_pool_t *pool, int mem_flags, dma_addr_t *handle) +{ + PHYSICAL_ADDRESS pa; + void * ptr; + UNREFERENCED_PARAMETER(mem_flags); + + ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + + ptr = ExAllocateFromNPagedLookasideList( &pool->pool_hdr ); + if (ptr != NULL) { + pa = MmGetPhysicalAddress( ptr ); + // TODO: convert physical adress to dma one + handle->da = pa.QuadPart; + handle->va = ptr; + handle->sz = 0; /* not known here */ + } + return ptr; +} + + +/** +* dma_pool_free - put block back into dma pool +* @pool: the dma pool holding the block +* @vaddr: virtual address of block +* @dma: dma address of block +* +* Caller promises neither device nor driver will again touch this block +* unless it is first re-allocated. +*/ +static inline void +pci_pool_free (pci_pool_t *pool, void *vaddr, dma_addr_t dma) +{ + UNREFERENCED_PARAMETER(dma); + ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + ExFreeToNPagedLookasideList( &pool->pool_hdr, vaddr ); +} + + + +/** + * pci_pool_destroy - destroys a pool of dma memory blocks. + * @pool: dma pool that will be destroyed + * Context: !in_interrupt() + * + * Caller guarantees that no more memory from the pool is in use, + * and that nothing will try to use the pool after this call. + */ +static inline void +pci_pool_destroy (pci_pool_t *pool) +{ + ExDeleteNPagedLookasideList( &pool->pool_hdr ); + ExFreePool( pool); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_radix.h b/branches/ConnectX/hw/mlx4/kernel/l2w_radix.h new file mode 100644 index 00000000..b12c2d78 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_radix.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +struct radix_tree_root { + cl_map_t map; +}; + +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item); + +void *radix_tree_lookup(struct radix_tree_root *root, + unsigned long index); + +void *radix_tree_delete(struct radix_tree_root *root, + unsigned long index); + + +cl_status_t radix_tree_create(struct radix_tree_root *root, + gfp_t gfp_mask); + +void radix_tree_destroy(struct radix_tree_root *root ); + +#define INIT_RADIX_TREE(root, mask) radix_tree_create(root, mask) + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_spinlock.h b/branches/ConnectX/hw/mlx4/kernel/l2w_spinlock.h new file mode 100644 index 00000000..9e8cb07a --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_spinlock.h @@ -0,0 +1,148 @@ +#pragma once + +#include + +#if 1 + +typedef cl_spinlock_t spinlock_t; + +static inline void spin_lock_init( + IN spinlock_t* const p_spinlock ) +{ + cl_spinlock_init( p_spinlock ); +} + +#define spin_lock cl_spinlock_acquire +#define spin_unlock cl_spinlock_release + +CL_INLINE void +spin_lock_dpc( + IN cl_spinlock_t* const p_spinlock ) +{ + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeAcquireSpinLockAtDpcLevel( &p_spinlock->lock ); +} + +CL_INLINE void +spin_unlock_dpc( + IN cl_spinlock_t* const p_spinlock ) +{ + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeReleaseSpinLockFromDpcLevel( &p_spinlock->lock ); +} + +#else +typedef struct spinlock { + KSPIN_LOCK lock; + KLOCK_QUEUE_HANDLE lockh; + KIRQL irql; +} spinlock_t; + + +static inline void spin_lock_init( + IN spinlock_t* const p_spinlock ) +{ + KeInitializeSpinLock( &p_spinlock->lock ); +} + +static inline void +spin_lock( + IN spinlock_t* const l) +{ + KIRQL irql = KeGetCurrentIrql(); + + ASSERT( l && irql <= DISPATCH_LEVEL ); + + if (irql == DISPATCH_LEVEL) + KeAcquireInStackQueuedSpinLockAtDpcLevel( &l->lock, &l->lockh ); + else + KeAcquireInStackQueuedSpinLock( &l->lock, &l->lockh ); + l->irql = irql; +} + +static inline void +spin_unlock( + IN spinlock_t* const l) +{ + ASSERT( l && KeGetCurrentIrql() == DISPATCH_LEVEL ); + if (l->irql == DISPATCH_LEVEL) + KeReleaseInStackQueuedSpinLockFromDpcLevel( &l->lockh ); + else + KeReleaseInStackQueuedSpinLock( &l->lockh ); +} + +/* to be used only at DPC level */ +static inline void +spin_lock_dpc( + IN spinlock_t* const l) +{ + ASSERT( l && KeGetCurrentIrql() == DISPATCH_LEVEL ); + KeAcquireInStackQueuedSpinLockAtDpcLevel( &l->lock, &l->lockh ); +} + +/* to be used only at DPC level */ +static inline void +spin_unlock_dpc( + IN spinlock_t* const l) +{ + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeReleaseInStackQueuedSpinLockFromDpcLevel( &l->lockh ); +} + +static inline void +spin_lock_sync( + IN spinlock_t* const l ) +{ + KLOCK_QUEUE_HANDLE lockh; + ASSERT( l && KeGetCurrentIrql() <= DISPATCH_LEVEL ); + KeAcquireInStackQueuedSpinLock ( &l->lock, &lockh ); + KeReleaseInStackQueuedSpinLock( &lockh ); +} + +#endif + +#define DEFINE_SPINLOCK(lock) spinlock_t lock + +static inline void +spin_lock_irqsave( + IN spinlock_t* const l, + IN unsigned long * flags) +{ + UNUSED_PARAM(flags); + spin_lock(l); +} + +static inline void +spin_unlock_irqrestore( + IN spinlock_t* const l, + IN unsigned long flags) +{ + UNUSED_PARAM(flags); + spin_unlock(l); +} + +static inline void +spin_lock_sync( + IN spinlock_t* const l ) +{ + KLOCK_QUEUE_HANDLE lockh; + ASSERT( l && KeGetCurrentIrql() <= DISPATCH_LEVEL ); + KeAcquireInStackQueuedSpinLock ( &l->lock, &lockh ); + KeReleaseInStackQueuedSpinLock( &lockh ); +} + +/* we are working from DPC level, so we can use usual spinlocks */ +#define spin_lock_irq spin_lock +#define spin_unlock_irq spin_unlock +#define spin_lock_nested(a,b) spin_lock(a) + +/* Windows doesn't support such kind of spinlocks so far, but may be tomorrow ... */ +#define rwlock_init spin_lock_init +#define read_lock_irqsave spin_lock_irqsave +#define read_unlock_irqrestore spin_unlock_irqrestore +#define write_lock_irq spin_lock_irq +#define write_unlock_irq spin_unlock_irq + +// rw_lock +typedef spinlock_t rwlock_t; + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_sync.h b/branches/ConnectX/hw/mlx4/kernel/l2w_sync.h new file mode 100644 index 00000000..48dd7ebe --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_sync.h @@ -0,0 +1,164 @@ +#pragma once + +// literals +#ifndef LONG_MAX +#define LONG_MAX 2147483647L /* maximum (signed) long value */ +#endif + + +// +// mutex wrapper +// + +struct mutex +{ + KMUTEX m; +}; + +#define DEFINE_MUTEX(a) struct mutex a + +static inline void mutex_init( struct mutex * mutex ) +{ + KeInitializeMutex( &mutex->m, 0 ); +} + +static inline void mutex_lock( struct mutex * mutex ) +{ + NTSTATUS status; + int need_to_wait = 1; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + while (need_to_wait) { + status = KeWaitForSingleObject( &mutex->m, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + +static inline void mutex_unlock( struct mutex * mutex ) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeReleaseMutex( &mutex->m, FALSE ); +} + + +// +// semaphore wrapper +// + +struct semaphore +{ + KSEMAPHORE s; +}; + +static inline void sema_init( + IN struct semaphore *sem, + IN LONG cnt) +{ + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeInitializeSemaphore( &sem->s, cnt, cnt ); +} + +static inline void up( struct semaphore *sem ) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeReleaseSemaphore( &sem->s, 0, 1, FALSE ); +} +static inline void down( struct semaphore *sem ) +{ + NTSTATUS status; + int need_to_wait = 1; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + while (need_to_wait) { + status = KeWaitForSingleObject( &sem->s, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + + +// +// completion wrapper +// + +struct completion +{ + KEVENT event; + int done; +}; + +static inline void init_completion( struct completion * compl ) +{ + //TODO: ASSERT is temporary outcommented, because using of fast mutexes in CompLib + // cause working on APC_LEVEL + //ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeInitializeEvent( &compl->event, NotificationEvent , FALSE ); + compl->done = 0; +} + +static inline int wait_for_completion_timeout( struct completion * compl, unsigned long timeout ) +{ + LARGE_INTEGER interval; + interval.QuadPart = (-10)* (__int64)timeout; + return (int)KeWaitForSingleObject( &compl->event, Executive, KernelMode, FALSE, &interval ); +} + +static inline void wait_for_completion( struct completion * compl ) +{ + NTSTATUS status; + int need_to_wait = 1; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + while (need_to_wait) { + status = KeWaitForSingleObject( &compl->event, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + + + +static inline void complete( struct completion * compl ) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + compl->done++; + KeSetEvent( &compl->event, 0, FALSE ); +} + +#ifdef USE_WDM_INTERRUPTS + +// +// IRQ wrapper +// + +static inline void free_irq(PKINTERRUPT int_obj) +{ + IoDisconnectInterrupt( int_obj ); +} + +int request_irq( + IN struct mlx4_dev * dev, + IN ULONG vector, /* interrupt or MSI-X vector */ + IN PKSERVICE_ROUTINE isr, /* ISR */ + IN PVOID isr_ctx, /* ISR context */ + IN dpc_t dpc, + IN PVOID dpc_ctx, /* ISR context */ + OUT PKINTERRUPT * int_obj /* interrupt object */ + ); + +#endif + +// +// various +// + +// TODO: Is it enough to wait at DPC level ? +// Maybe we need to use here KeSynchronizeExecution ? +static inline void synchronize_irq(unsigned int irq) +{ + UNUSED_PARAM(irq); + KeFlushQueuedDpcs(); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_time.h b/branches/ConnectX/hw/mlx4/kernel/l2w_time.h new file mode 100644 index 00000000..672b9547 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_time.h @@ -0,0 +1,17 @@ +#pragma once + +// returns current time in usecs (u64) +#define jiffies cl_get_time_stamp() + +// jiffies is measured in usecs here! +#define msecs_to_jiffies(msecs) ((msecs)*1000) + +#define time_after(a,b) ((long)(b) - (long)(a) < 0) +#define time_before(a,b) time_after(b,a) + +#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) +#define time_before_eq(a,b) time_after_eq(b,a) + +extern LARGE_INTEGER g_cmd_interval; +#define cond_resched() KeDelayExecutionThread( KernelMode, FALSE, &g_cmd_interval ) + diff --git a/branches/ConnectX/hw/mlx4/kernel/l2w_umem.h b/branches/ConnectX/hw/mlx4/kernel/l2w_umem.h new file mode 100644 index 00000000..e367eb4b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/l2w_umem.h @@ -0,0 +1,34 @@ +#pragma once + +#include "l2w_memory.h" +#include "iobuf.h" + +struct ib_umem { + struct ib_ucontext *p_uctx; + int page_size; + iobuf_t iobuf; + int iobuf_used; + void * secure_handle; +}; + + +void ib_umem_release(struct ib_umem *p_ib_umem); + +struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr, + size_t size, enum ib_access_flags access); + +int ib_umem_page_count(struct ib_umem *p_ib_umem); + +dma_addr_t ib_umem_get_dma(struct ib_umem *p_ib_umem); + +int ib_umem_map( + IN u64 va, + IN u64 size, + IN ib_access_t acc, + OUT PMDL *mdl, + OUT void **kva); + +void ib_umem_unmap( + IN PMDL p_mdl, + IN void *kva); + diff --git a/branches/ConnectX/hw/mlx4/kernel/mlx4_debug.h b/branches/ConnectX/hw/mlx4/kernel/mlx4_debug.h new file mode 100644 index 00000000..b467416c --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/mlx4_debug.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mlx4_debug.h 1936 2007-02-06 16:04:33Z sleybo $ + */ + + +#ifndef _MLX4_DEBUG_H_ +#define _MLX4_DEBUG_H_ + +#include "core\ev_log.h" + +extern uint32_t g_mlx4_dbg_level; +extern uint32_t g_mlx4_dbg_flags; +#define MAX_LOG_BUF_LEN 512 +extern WCHAR g_wlog_buf[ MAX_LOG_BUF_LEN ]; +extern UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; + +static void _build_str( const char * format, ... ) +{ + NTSTATUS status; + va_list p_arg; + va_start(p_arg, format); +// vsprintf((char *)g_slog_buf, format , p_arg); +// swprintf(g_wlog_buf, L"%S", g_slog_buf); + status = RtlStringCbVPrintfA((char *)g_slog_buf, sizeof(g_slog_buf), format , p_arg); + if (status) + goto end; + status = RtlStringCchPrintfW(g_wlog_buf, sizeof(g_wlog_buf), L"%S", g_slog_buf); + if (status) + goto end; +// vsnprintf_s((char *)g_slog_buf, sizeof(g_slog_buf), _TRUNCATE, format , p_arg); +// swprintf_s(g_wlog_buf, sizeof(g_wlog_buf), L"%S", g_slog_buf); +end: + va_end(p_arg); +} + +#define MLX4_PRINT_TO_EVENT_LOG(_obj_,_level_,_flag_,_msg_) \ + { \ + NTSTATUS event_id; \ + switch (_level_) { \ + case TRACE_LEVEL_FATAL: case TRACE_LEVEL_ERROR: event_id = EVENT_MLX4_ANY_ERROR; break; \ + case TRACE_LEVEL_WARNING: event_id = EVENT_MLX4_ANY_WARN; break; \ + default: event_id = EVENT_MLX4_ANY_INFO; break; \ + } \ + _build_str _msg_; \ + WriteEventLogEntryStr( _obj_, (ULONG)event_id, 0, 0, g_wlog_buf, 0, 0 ); \ + } + +#define MLX4_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + MLX4_PRINT_TO_EVENT_LOG(mdev->pdev->p_self_do,_level_,_flag_,_msg_) + + +#if defined(EVENT_TRACING) +// +// Software Tracing Definitions +// + +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(Mlx4BusCtlGuid,(E51BB6E2,914A,4e21,93C0,192F4801BBFF), \ + WPP_DEFINE_BIT( MLX4_DBG_DEV) \ + WPP_DEFINE_BIT( MLX4_DBG_PNP) \ + WPP_DEFINE_BIT( MLX4_DBG_INIT) \ + WPP_DEFINE_BIT( MLX4_DBG_MAD) \ + WPP_DEFINE_BIT( MLX4_DBG_PO) \ + WPP_DEFINE_BIT( MLX4_DBG_PD)\ + WPP_DEFINE_BIT( MLX4_DBG_CQ) \ + WPP_DEFINE_BIT( MLX4_DBG_QP) \ + WPP_DEFINE_BIT( MLX4_DBG_MEMORY) \ + WPP_DEFINE_BIT( MLX4_DBG_AV) \ + WPP_DEFINE_BIT( MLX4_DBG_SRQ) \ + WPP_DEFINE_BIT( MLX4_DBG_MCAST) \ + WPP_DEFINE_BIT( MLX4_DBG_LOW) \ + WPP_DEFINE_BIT( MLX4_DBG_SHIM)) \ + WPP_DEFINE_BIT( MLX4_DBG_DRV)) + + +#define WPP_GLOBALLOGGER + + +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) + + +// begin_wpp config +// MLX4_ENTER(FLAG); +// MLX4_EXIT(FLAG); +// USEPREFIX(MLX4_PRINT, "%!STDPREFIX! [MTMLX4] :%!FUNC!() :"); +// USESUFFIX(MLX4_ENTER, " [MTMLX4] :%!FUNC!()["); +// USESUFFIX(MLX4_EXIT, " [MTMLX4] :%!FUNC!()]"); +// end_wpp + + + +#define MLX4_PRINT_EV(_level_,_flag_,_msg_) \ + { \ + MLX4_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + } + + +#else + + +#include + +/* + * Debug macros + */ + + +#define MLX4_DBG_DEV (1 << 0) +#define MLX4_DBG_PNP (1<<1) +#define MLX4_DBG_INIT (1 << 2) +#define MLX4_DBG_MAD (1 << 3) +#define MLX4_DBG_PO (1 << 4) +#define MLX4_DBG_PD (1<<5) +#define MLX4_DBG_QP (1 << 6) +#define MLX4_DBG_CQ (1 << 7) +#define MLX4_DBG_MEMORY (1 << 8) +#define MLX4_DBG_AV (1<<9) +#define MLX4_DBG_SRQ (1 << 10) +#define MLX4_DBG_MCAST (1<<11) +#define MLX4_DBG_LOW (1 << 12) +#define MLX4_DBG_SHIM (1 << 13) +#define MLX4_DBG_DRV (1 << 14) + + +#if DBG + +// assignment of _level_ is need to to overcome warning C4127 +#define MLX4_PRINT(_level_,_flag_,_msg_) \ + { \ + int __lvl = _level_; \ + if (g_mlx4_dbg_level >= (_level_) && \ + (g_mlx4_dbg_flags & (_flag_))) { \ + DbgPrint ("~%d:[MLX4_BUS] %s() :", KeGetCurrentProcessorNumber(), __FUNCTION__); \ + if(__lvl == TRACE_LEVEL_ERROR) DbgPrint ("***ERROR*** "); \ + DbgPrint _msg_; \ + } \ + } + +#else + +#define MLX4_PRINT(lvl ,flags, msg) + +#endif + +#define MLX4_PRINT_EV(_level_,_flag_,_msg_) \ + { \ + MLX4_PRINT(_level_,_flag_,_msg_) \ + MLX4_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + } + +#define MLX4_ENTER(flags)\ + MLX4_PRINT(TRACE_LEVEL_VERBOSE, flags,("[\n")); + +#define MLX4_EXIT(flags)\ + MLX4_PRINT(TRACE_LEVEL_VERBOSE, flags, ("]\n" )); + + +#endif //EVENT_TRACING + + +#endif /*_MLX4_DEBUG_H_ */ + + diff --git a/branches/ConnectX/hw/mlx4/kernel/net/Makefile.lnx b/branches/ConnectX/hw/mlx4/kernel/net/Makefile.lnx new file mode 100644 index 00000000..0952a652 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/Makefile.lnx @@ -0,0 +1,4 @@ +obj-$(CONFIG_MLX4_CORE) += mlx4_core.o + +mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ + mr.o pd.o profile.o qp.o reset.o srq.o diff --git a/branches/ConnectX/hw/mlx4/kernel/net/SOURCES b/branches/ConnectX/hw/mlx4/kernel/net/SOURCES new file mode 100644 index 00000000..6fa4bbee --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/SOURCES @@ -0,0 +1,53 @@ +TARGETNAME=mlx4_net +TARGETPATH=..\..\..\..\bin\kernel\obj$(BUILD_ALT_DIR) +TARGETTYPE=DRIVER_LIBRARY + + + +!if $(FREEBUILD) +#ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + + +DLLDEF=net.def + +SOURCES= net.rc \ + alloc.c \ + catas.c \ + cmd.c \ + cq.c \ + eq.c \ + fw.c \ + icm.c \ + intf.c \ + main.c \ + mcg.c \ + mr.c \ + pd.c \ + profile.c \ + qp.c \ + srq.c \ + +INCLUDES=..;..\inc;..\..\..\..\inc;..\..\..\..\inc\kernel; + +C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS +#-DFORCE_LIVEFISH + +TARGETLIBS= \ + $(DDK_LIB_PATH)\ntstrsafe.lib \ + $(TARGETPATH)\*\complib.lib \ + $(TARGETPATH)\*\mlx4_core.lib \ + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING + +RUN_WPP = $(SOURCES) -km -ext: .c .h .C .H \ + -scan:..\mlx4_debug.h \ + -func:MLX4_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:MLX4_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +MSC_WARNING_LEVEL= /W4 diff --git a/branches/ConnectX/hw/mlx4/kernel/net/alloc.c b/branches/ConnectX/hw/mlx4/kernel/net/alloc.c new file mode 100644 index 00000000..b4889055 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/alloc.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" + +u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) +{ + u32 obj; + + spin_lock(&bitmap->lock); + + obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); + if (obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + obj = find_first_zero_bit(bitmap->table, bitmap->max); + } + + if (obj < bitmap->max) { + set_bit(obj, bitmap->table); + bitmap->last = (obj + 1) & (bitmap->max - 1); + obj |= bitmap->top; + } else + obj = (u32)-1; + + spin_unlock(&bitmap->lock); + + return obj; +} + +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj) +{ + obj &= bitmap->max - 1; + + spin_lock(&bitmap->lock); + clear_bit(obj, bitmap->table); + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + spin_unlock(&bitmap->lock); +} + +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved) +{ + int i; + + /* num must be a power of 2 */ + if (num != roundup_pow_of_two(num)) + return -EINVAL; + + bitmap->last = 0; + bitmap->top = 0; + bitmap->max = num; + bitmap->mask = mask; + spin_lock_init(&bitmap->lock); + bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL); + if (!bitmap->table) + return -ENOMEM; + + for (i = 0; i < (int)reserved; ++i) + set_bit(i, bitmap->table); + + return 0; +} + +void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) +{ + kfree(bitmap->table); +} + +/* + * Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, + struct mlx4_buf *buf) +{ + dma_addr_t t; + + if (size <= max_direct) { + buf->nbufs = 1; + buf->npages = 1; + // TODO: we don't use pages less then PAGE_SIZE + size = max(size, PAGE_SIZE); + buf->page_shift = get_order(size) + PAGE_SHIFT; + buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->u.direct.buf) + return -ENOMEM; + + buf->u.direct.map = t; + + while (t.da & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_CQ, + ("size %#x, nbufs %d, pages %d, page_shift %d, kva %p, da %llx, buf_size %#x\n", + size, buf->nbufs, buf->npages, buf->page_shift, + buf->u.direct.buf, t.da, t.sz )); + memset(buf->u.direct.buf, 0, size); + } else { + int i; + + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; + buf->page_shift = PAGE_SHIFT; + buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list, + GFP_KERNEL); + if (!buf->u.page_list) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; ++i) { + buf->u.page_list[i].buf = + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->u.page_list[i].buf) + goto err_free; + + buf->u.page_list[i].map = t; + + memset(buf->u.page_list[i].buf, 0, PAGE_SIZE); + } + } + + return 0; + +err_free: + mlx4_buf_free(dev, size, buf); + + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx4_buf_alloc); + +void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) +{ + int i; + + // TODO: we don't use pages less then PAGE_SIZE + size = max(size, PAGE_SIZE); + + if (buf->nbufs == 1) + dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf, + buf->u.direct.map); + else { + for (i = 0; i < buf->nbufs; ++i) + if (buf->u.page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->u.page_list[i].buf, + buf->u.page_list[i].map); + kfree(buf->u.page_list); + } +} +EXPORT_SYMBOL_GPL(mlx4_buf_free); diff --git a/branches/ConnectX/hw/mlx4/kernel/net/catas.c b/branches/ConnectX/hw/mlx4/kernel/net/catas.c new file mode 100644 index 00000000..68e4ef43 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/catas.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" + +enum { + MLX4_CATAS_POLL_INTERVAL = 5 * HZ, +}; + +static DEFINE_SPINLOCK(catas_lock); +static LIST_HEAD(catas_list); + +// TODO: put into Globals +// "Reset device on internal errors if non-zero (default 1)") +int g_internal_err_reset = 1; + +static void dump_err_buf(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + u32 i; + + mlx4_err(dev, "Internal error detected:\n"); + for (i = 0; i < priv->fw.catas_size; ++i) + mlx4_err(dev, " buf[%02x]: %08x\n", + i, swab32(readl(priv->catas_err.map + i))); +} + +static void catas_reset() +{ + struct mlx4_priv *priv, *tmppriv; + struct mlx4_dev *dev; + struct list_head tlist; + int ret; + + tlist.next = tlist.prev = &tlist; + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list, struct mlx4_priv, struct mlx4_priv) { + ret = mlx4_restart_one(priv->dev.pdev); + dev = &priv->dev; + if (ret) + mlx4_err(dev, "Reset failed (%d)\n", ret); + else + mlx4_dbg(dev, "Reset succeeded\n"); + } +} + +static void +catas_reset_wi( + IN DEVICE_OBJECT* p_dev_obj, + IN void* context ) +{ + UNUSED_PARAM(p_dev_obj); + IoFreeWorkItem( context ); + catas_reset(); +} + +/* polling on DISPATCH_LEVEL */ +static void poll_catas(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + + mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); + + if (g_internal_err_reset) { + PIO_WORKITEM catas_work = IoAllocateWorkItem( dev->pdev->p_self_do ); + + spin_lock_dpc(&catas_lock); + list_add(&priv->catas_err.list, &catas_list); + spin_unlock_dpc(&catas_lock); + + if (!catas_work) + IoQueueWorkItem( catas_work, catas_reset_wi, DelayedWorkQueue, catas_work ); + } + } else { + spin_lock_dpc(&catas_lock); + if (!priv->catas_err.stop) { + KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, + 0, &priv->catas_err.timer_dpc ); + } + spin_unlock_dpc(&catas_lock); + } +} + +static void timer_dpc( + IN struct _KDPC *Dpc, + IN PVOID DeferredContext, + IN PVOID SystemArgument1, + IN PVOID SystemArgument2 + ) +{ + struct mlx4_dev *dev = (struct mlx4_dev *)DeferredContext; + UNREFERENCED_PARAMETER(Dpc); + UNREFERENCED_PARAMETER(SystemArgument1); + UNREFERENCED_PARAMETER(SystemArgument2); + poll_catas( dev ); +} + +void mlx4_start_catas_poll(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 addr; + + INIT_LIST_HEAD(&priv->catas_err.list); + priv->catas_err.map = NULL; + + addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + + priv->fw.catas_offset; + + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n", + addr); + return; + } + + priv->catas_err.stop = 0; + spin_lock_init( &catas_lock ); + KeInitializeDpc( &priv->catas_err.timer_dpc, timer_dpc, dev ); + KeInitializeTimer( &priv->catas_err.timer ); + priv->catas_err.interval.QuadPart = (-10)* (__int64)MLX4_CATAS_POLL_INTERVAL; + KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, + 0, &priv->catas_err.timer_dpc ); +} + +void mlx4_stop_catas_poll(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + spin_lock_irq(&catas_lock); + priv->catas_err.stop = 1; + spin_unlock_irq(&catas_lock); + + KeCancelTimer(&priv->catas_err.timer); + KeFlushQueuedDpcs(); + + if (priv->catas_err.map) + iounmap(priv->catas_err.map, priv->fw.catas_size * 4); + + spin_lock_irq(&catas_lock); + list_del(&priv->catas_err.list); + spin_unlock_irq(&catas_lock); +} + + diff --git a/branches/ConnectX/hw/mlx4/kernel/net/cmd.c b/branches/ConnectX/hw/mlx4/kernel/net/cmd.c new file mode 100644 index 00000000..ef71595d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/cmd.c @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "cmd.h" + +#define CMD_POLL_TOKEN 0xffff + +enum { + /* command completed successfully: */ + CMD_STAT_OK = 0x00, + /* Internal error (such as a bus error) occurred while processing command: */ + CMD_STAT_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported: */ + CMD_STAT_BAD_OP = 0x02, + /* Parameter not supported or parameter out of range: */ + CMD_STAT_BAD_PARAM = 0x03, + /* System not enabled or bad system state: */ + CMD_STAT_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocaterd resource: */ + CMD_STAT_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command, or is otherwise busy: */ + CMD_STAT_RESOURCE_BUSY = 0x06, + /* Required capability exceeds device limits: */ + CMD_STAT_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership: */ + CMD_STAT_BAD_RES_STATE = 0x09, + /* Index out of range: */ + CMD_STAT_BAD_INDEX = 0x0a, + /* FW image corrupted: */ + CMD_STAT_BAD_NVMEM = 0x0b, + /* Attempt to modify a QP/EE which is not in the presumed state: */ + CMD_STAT_BAD_QP_STATE = 0x10, + /* Bad segment parameters (Address/Size): */ + CMD_STAT_BAD_SEG_PARAM = 0x20, + /* Memory Region has Memory Windows bound to: */ + CMD_STAT_REG_BOUND = 0x21, + /* HCA local attached memory not present: */ + CMD_STAT_LAM_NOT_PRE = 0x22, + /* Bad management packet (silently discarded): */ + CMD_STAT_BAD_PKT = 0x30, + /* More outstanding CQEs in CQ than new CQ size: */ + CMD_STAT_BAD_SIZE = 0x40, + /* must be the last and have max value */ + CMD_STAT_SIZE = CMD_STAT_BAD_SIZE + 1 +}; + +enum { + HCR_IN_PARAM_OFFSET = 0x00, + HCR_IN_MODIFIER_OFFSET = 0x08, + HCR_OUT_PARAM_OFFSET = 0x0c, + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + + HCR_OPMOD_SHIFT = 12, + HCR_T_BIT = 21, + HCR_E_BIT = 22, + HCR_GO_BIT = 23 +}; + +enum { + GO_BIT_TIMEOUT_MSECS = 10000 +}; + +struct mlx4_cmd_context { + struct completion done; + int result; + int next; + u64 out_param; + u16 token; +}; + +static int mlx4_status_to_errno(u8 status) { + static int trans_table[CMD_STAT_SIZE]; + static int filled = 0; + + if ( !filled ) { + memset( (char*)trans_table, 0, sizeof(trans_table) ); + trans_table[CMD_STAT_INTERNAL_ERR] = -EIO; + trans_table[CMD_STAT_BAD_OP] = -EPERM; + trans_table[CMD_STAT_BAD_PARAM] = -EINVAL; + trans_table[CMD_STAT_BAD_SYS_STATE] = -ENXIO; + trans_table[CMD_STAT_BAD_RESOURCE] = -EBADF; + trans_table[CMD_STAT_RESOURCE_BUSY] = -EBUSY; + trans_table[CMD_STAT_EXCEED_LIM] = -ENOMEM; + trans_table[CMD_STAT_BAD_RES_STATE] = -EBADF; + trans_table[CMD_STAT_BAD_INDEX] = -EBADF; + trans_table[CMD_STAT_BAD_NVMEM] = -EFAULT; + trans_table[CMD_STAT_BAD_QP_STATE] = -EINVAL; + trans_table[CMD_STAT_BAD_SEG_PARAM] = -EFAULT; + trans_table[CMD_STAT_REG_BOUND] = -EBUSY; + trans_table[CMD_STAT_LAM_NOT_PRE] = -EAGAIN; + trans_table[CMD_STAT_BAD_PKT] = -EINVAL; + trans_table[CMD_STAT_BAD_SIZE] = -ENOMEM; + filled = 1; + } + + if (status >= ARRAY_SIZE(trans_table) || + (status != CMD_STAT_OK && trans_table[status] == 0)) + return -EIO; + + return trans_table[status]; +} + +static int cmd_pending(struct mlx4_dev *dev) +{ + u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); + + return (status & swab32(1 << HCR_GO_BIT)) || + (mlx4_priv(dev)->cmd.toggle == + !!(status & swab32(1 << HCR_T_BIT))); +} + +static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, + u32 in_modifier, u8 op_modifier, u16 op, u16 token, + int event) +{ + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + u32 __iomem *hcr = (u32 __iomem *)cmd->hcr; + int ret = -EAGAIN; + u64 end; + + mutex_lock(&cmd->hcr_mutex); + + end = jiffies; + if (event) + end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); + + while (cmd_pending(dev)) { + if (time_after_eq(jiffies, end)) + goto out; + cond_resched(); + } + + /* + * We use writel (instead of something like memcpy_toio) + * because writes of less than 32 bits to the HCR don't work + * (and some architectures such as ia64 implement memcpy_toio + * in terms of writeb). + */ + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1); + __raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4); + __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5); + + /* __raw_writel may not order writes. */ + wmb(); + + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (cmd->toggle << HCR_T_BIT) | + (event ? (1 << HCR_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), hcr + 6); + + /* + * Make sure that our HCR writes don't get mixed in with + * writes from another CPU starting a FW command. + */ + mmiowb(); + + cmd->toggle = cmd->toggle ^ 1; + + ret = 0; + +out: + mutex_unlock(&cmd->hcr_mutex); + return ret; +} + +static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u8 __iomem *hcr = priv->cmd.hcr; + int err = 0; + u64 end; + + down(&priv->cmd.poll_sem); + + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); + if (err) + goto out; + + end = msecs_to_jiffies(timeout) + jiffies; + while (cmd_pending(dev) && time_before(jiffies, end)) + cond_resched(); + + if (cmd_pending(dev)) { + err = -ETIMEDOUT; + goto out; + } + + if (out_is_imm) + *out_param = + (u64) be32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) be32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4)); + + err = mlx4_status_to_errno((u8)(be32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24)); + +out: + up(&priv->cmd.poll_sem); + return err; +} + +void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context = + &priv->cmd.context[token & priv->cmd.token_mask]; + + /* previously timed out command completing at long last */ + if (token != context->token) + return; + + context->result = mlx4_status_to_errno(status); + context->out_param = out_param; + + complete(&context->done); +} + +static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout) +{ + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + struct mlx4_cmd_context *context; + int err = 0; + + down(&cmd->event_sem); + + spin_lock(&cmd->context_lock); + BUG_ON(cmd->free_head < 0); + context = &cmd->context[cmd->free_head]; + context->token += cmd->token_mask + 1; + cmd->free_head = context->next; + spin_unlock(&cmd->context_lock); + + init_completion(&context->done); + + mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + + if (wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { + if (!context->done.done) { + err = -EBUSY; + goto out; + } + } + + err = context->result; + if (err) + goto out; + + if (out_is_imm) + *out_param = context->out_param; + +out: + spin_lock(&cmd->context_lock); + context->next = cmd->free_head; + cmd->free_head = (int)(context - cmd->context); + spin_unlock(&cmd->context_lock); + + up(&cmd->event_sem); + return err; +} + +static char *__print_opcode(int opcode) +{ + char *str = NULL; + switch (opcode) { + case MLX4_CMD_SYS_EN : str = "MLX4_CMD_SYS_EN "; break; + case MLX4_CMD_SYS_DIS: str = "MLX4_CMD_SYS_DIS"; break; + case MLX4_CMD_MAP_FA : str = "MLX4_CMD_MAP_FA "; break; + case MLX4_CMD_UNMAP_FA: str = "MLX4_CMD_UNMAP_FA"; break; + case MLX4_CMD_RUN_FW : str = "MLX4_CMD_RUN_FW "; break; + case MLX4_CMD_MOD_STAT_CFG: str = "MLX4_CMD_MOD_STAT_CFG"; break; + case MLX4_CMD_QUERY_DEV_CAP: str = "MLX4_CMD_QUERY_DEV_CAP"; break; + case MLX4_CMD_QUERY_FW: str = "MLX4_CMD_QUERY_FW"; break; + case MLX4_CMD_ENABLE_LAM: str = "MLX4_CMD_ENABLE_LAM"; break; + case MLX4_CMD_DISABLE_LAM: str = "MLX4_CMD_DISABLE_LAM"; break; + case MLX4_CMD_QUERY_DDR: str = "MLX4_CMD_QUERY_DDR"; break; + case MLX4_CMD_QUERY_ADAPTER: str = "MLX4_CMD_QUERY_ADAPTER"; break; + case MLX4_CMD_INIT_HCA: str = "MLX4_CMD_INIT_HCA"; break; + case MLX4_CMD_CLOSE_HCA: str = "MLX4_CMD_CLOSE_HCA"; break; + case MLX4_CMD_INIT_PORT: str = "MLX4_CMD_INIT_PORT"; break; + case MLX4_CMD_CLOSE_PORT: str = "MLX4_CMD_CLOSE_PORT"; break; + case MLX4_CMD_QUERY_HCA: str = "MLX4_CMD_QUERY_HCA"; break; + case MLX4_CMD_QUERY_PORT: str = "MLX4_CMD_QUERY_PORT"; break; + case MLX4_CMD_SET_PORT: str = "MLX4_CMD_SET_PORT"; break; + case MLX4_CMD_ACCESS_DDR: str = "MLX4_CMD_ACCESS_DDR"; break; + case MLX4_CMD_MAP_ICM: str = "MLX4_CMD_MAP_ICM"; break; + case MLX4_CMD_UNMAP_ICM: str = "MLX4_CMD_UNMAP_ICM"; break; + case MLX4_CMD_MAP_ICM_AUX: str = "MLX4_CMD_MAP_ICM_AUX"; break; + case MLX4_CMD_UNMAP_ICM_AUX: str = "MLX4_CMD_UNMAP_ICM_AUX"; break; + case MLX4_CMD_SET_ICM_SIZE: str = "MLX4_CMD_SET_ICM_SIZE"; break; + case MLX4_CMD_SW2HW_MPT: str = "MLX4_CMD_SW2HW_MPT"; break; + case MLX4_CMD_QUERY_MPT: str = "MLX4_CMD_QUERY_MPT"; break; + case MLX4_CMD_HW2SW_MPT: str = "MLX4_CMD_HW2SW_MPT"; break; + case MLX4_CMD_READ_MTT: str = "MLX4_CMD_READ_MTT"; break; + case MLX4_CMD_WRITE_MTT: str = "MLX4_CMD_WRITE_MTT"; break; + case MLX4_CMD_SYNC_TPT: str = "MLX4_CMD_SYNC_TPT"; break; + case MLX4_CMD_MAP_EQ : str = "MLX4_CMD_MAP_EQ "; break; + case MLX4_CMD_SW2HW_EQ: str = "MLX4_CMD_SW2HW_EQ"; break; + case MLX4_CMD_HW2SW_EQ: str = "MLX4_CMD_HW2SW_EQ"; break; + case MLX4_CMD_QUERY_EQ: str = "MLX4_CMD_QUERY_EQ"; break; + case MLX4_CMD_SW2HW_CQ: str = "MLX4_CMD_SW2HW_CQ"; break; + case MLX4_CMD_HW2SW_CQ: str = "MLX4_CMD_HW2SW_CQ"; break; + case MLX4_CMD_QUERY_CQ: str = "MLX4_CMD_QUERY_CQ"; break; + case MLX4_CMD_MODIFY_CQ: str = "MLX4_CMD_MODIFY_CQ"; break; + case MLX4_CMD_SW2HW_SRQ: str = "MLX4_CMD_SW2HW_SRQ"; break; + case MLX4_CMD_HW2SW_SRQ: str = "MLX4_CMD_HW2SW_SRQ"; break; + case MLX4_CMD_QUERY_SRQ: str = "MLX4_CMD_QUERY_SRQ"; break; + case MLX4_CMD_ARM_SRQ: str = "MLX4_CMD_ARM_SRQ"; break; + case MLX4_CMD_RST2INIT_QP: str = "MLX4_CMD_RST2INIT_QP"; break; + case MLX4_CMD_INIT2RTR_QP: str = "MLX4_CMD_INIT2RTR_QP"; break; + case MLX4_CMD_RTR2RTS_QP: str = "MLX4_CMD_RTR2RTS_QP"; break; + case MLX4_CMD_RTS2RTS_QP: str = "MLX4_CMD_RTS2RTS_QP"; break; + case MLX4_CMD_SQERR2RTS_QP: str = "MLX4_CMD_SQERR2RTS_QP"; break; + case MLX4_CMD_2ERR_QP: str = "MLX4_CMD_2ERR_QP"; break; + case MLX4_CMD_RTS2SQD_QP: str = "MLX4_CMD_RTS2SQD_QP"; break; + case MLX4_CMD_SQD2SQD_QP: str = "MLX4_CMD_SQD2SQD_QP"; break; + case MLX4_CMD_SQD2RTS_QP: str = "MLX4_CMD_SQD2RTS_QP"; break; + case MLX4_CMD_2RST_QP: str = "MLX4_CMD_2RST_QP"; break; + case MLX4_CMD_QUERY_QP: str = "MLX4_CMD_QUERY_QP"; break; + case MLX4_CMD_INIT2INIT_QP: str = "MLX4_CMD_INIT2INIT_QP"; break; + case MLX4_CMD_SUSPEND_QP: str = "MLX4_CMD_SUSPEND_QP"; break; + case MLX4_CMD_UNSUSPEND_QP: str = "MLX4_CMD_UNSUSPEND_QP"; break; + case MLX4_CMD_CONF_SPECIAL_QP: str = "MLX4_CMD_CONF_SPECIAL_QP"; break; + case MLX4_CMD_MAD_IFC: str = "MLX4_CMD_MAD_IFC"; break; + case MLX4_CMD_READ_MCG: str = "MLX4_CMD_READ_MCG"; break; + case MLX4_CMD_WRITE_MCG: str = "MLX4_CMD_WRITE_MCG"; break; + case MLX4_CMD_MGID_HASH: str = "MLX4_CMD_MGID_HASH"; break; + case MLX4_CMD_DIAG_RPRT: str = "MLX4_CMD_DIAG_RPRT"; break; + case MLX4_CMD_NOP : str = "MLX4_CMD_NOP "; break; + case MLX4_CMD_QUERY_DEBUG_MSG: str = "MLX4_CMD_QUERY_DEBUG_MSG"; break; + case MLX4_CMD_SET_DEBUG_MSG: str = "MLX4_CMD_SET_DEBUG_MSG"; break; + } + return str; +} + +int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout) +{ +#if 0 + mlx4_err(dev, "op %s, ev %d, in_param %#I64x, in_param %#I64x, out_is_imm %d, in_modifier %#x, op_modifier %d\n", + __print_opcode(op), mlx4_priv(dev)->cmd.use_events, in_param, out_param, + out_is_imm, in_modifier, (int)op_modifier); +#endif + + if (mlx4_priv(dev)->cmd.use_events) + return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, + in_modifier, op_modifier, op, timeout); + else + return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm, + in_modifier, op_modifier, op, timeout); +} +EXPORT_SYMBOL_GPL(__mlx4_cmd); + +int mlx4_cmd_init(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_init(&priv->cmd.hcr_mutex); + sema_init(&priv->cmd.poll_sem, 1); + priv->cmd.use_events = 0; + priv->cmd.toggle = 1; + + priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE, + MLX4_HCR_SIZE); + if (!priv->cmd.hcr) { + mlx4_err(dev, "Couldn't map command register."); + return -ENOMEM; + } + + priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + MLX4_MAILBOX_SIZE, + MLX4_MAILBOX_SIZE, 0); + if (!priv->cmd.pool) { + iounmap(priv->cmd.hcr, MLX4_HCR_SIZE); + return -ENOMEM; + } + + return 0; +} + +void mlx4_cmd_cleanup(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + pci_pool_destroy(priv->cmd.pool); + iounmap(priv->cmd.hcr, MLX4_HCR_SIZE); +} + +/* + * Switch to using events to issue FW commands (can only be called + * after event queue for command events has been initialized). + */ +int mlx4_cmd_use_events(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + priv->cmd.context = kmalloc(priv->cmd.max_cmds * + sizeof (struct mlx4_cmd_context), + GFP_KERNEL); + if (!priv->cmd.context) + return -ENOMEM; + + for (i = 0; i < priv->cmd.max_cmds; ++i) { + priv->cmd.context[i].token = (u16)i; + priv->cmd.context[i].next = i + 1; + } + + priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; + priv->cmd.free_head = 0; + + sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); + spin_lock_init(&priv->cmd.context_lock); + + for (priv->cmd.token_mask = 1; + priv->cmd.token_mask < priv->cmd.max_cmds; + priv->cmd.token_mask <<= 1) + ; /* nothing */ + --priv->cmd.token_mask; + + priv->cmd.use_events = 1; + + down(&priv->cmd.poll_sem); + + return 0; +} + +/* + * Switch back to polling (used when shutting down the device) + */ +void mlx4_cmd_use_polling(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + priv->cmd.use_events = 0; + + for (i = 0; i < priv->cmd.max_cmds; ++i) + down(&priv->cmd.event_sem); + + kfree(priv->cmd.context); + + up(&priv->cmd.poll_sem); +} + +struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL, + &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} +EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox); + +void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox) +{ + if (!mailbox) + return; + + pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} +EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox); diff --git a/branches/ConnectX/hw/mlx4/kernel/net/cq.c b/branches/ConnectX/hw/mlx4/kernel/net/cq.c new file mode 100644 index 00000000..ddc9abbe --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/cq.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "cmd.h" +#include "icm.h" +#include "cq.h" + +#define MLX4_CQ_STATUS_OK ( 0 << 28) +#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28) +#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28) +#define MLX4_CQ_FLAG_CC ( 1 << 18) +#define MLX4_CQ_FLAG_OI ( 1 << 17) +#define MLX4_CQ_STATE_ARMED ( 9 << 8) +#define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8) +#define MLX4_EQ_STATE_FIRED (10 << 8) + +void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) +{ + struct mlx4_cq *cq; + + cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, + cqn & (dev->caps.num_cqs - 1)); + if (!cq) { + mlx4_warn(dev, "Completion event for bogus CQ %08x\n", cqn); + return; + } + + if (cq->p_u_arm_sn) + ++*cq->p_u_arm_sn; + else + ++cq->arm_sn; + + cq->comp(cq); +} + +void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) +{ + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; + struct mlx4_cq *cq; + + spin_lock_dpc(&cq_table->lock); + + cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); + if (cq) + atomic_inc(&cq->refcount); + + spin_unlock_dpc(&cq_table->lock); + + if (!cq) { + mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); + return; + } + + cq->event(cq, event_type); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + +static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int cq_num) +{ + return mlx4_cmd(dev, mailbox->dma.da, cq_num, 0, MLX4_CMD_SW2HW_CQ, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int cq_num, u32 opmod) +{ + return mlx4_cmd(dev, mailbox->dma.da, cq_num, (u8)opmod, MLX4_CMD_MODIFY_CQ, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int cq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma.da : 0, cq_num, + mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ, + MLX4_CMD_TIME_CLASS_A); +} + +int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cq_table *cq_table = &priv->cq_table; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cq_context *cq_context; + u64 mtt_addr; + int err; + + cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap); + if (cq->cqn == -1) + return -ENOMEM; + + err = mlx4_table_get(dev, &cq_table->table, cq->cqn); + if (err) + goto err_out; + + err = mlx4_table_get(dev, &cq_table->cmpt_table, cq->cqn); + if (err) + goto err_put; + + spin_lock_irq(&cq_table->lock); + err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); + spin_unlock_irq(&cq_table->lock); + if (err) + goto err_cmpt_put; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_radix; + } + + cq_context = (struct mlx4_cq_context *)mailbox->buf; + memset(cq_context, 0, sizeof *cq_context); + + cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); + cq_context->comp_eqn = (u8)priv->eq_table.eq[MLX4_EQ_COMP].eqn; + cq_context->log_page_size = (u8)(mtt->page_shift - MLX4_ICM_PAGE_SHIFT); + + mtt_addr = mlx4_mtt_addr(dev, mtt); + cq_context->mtt_base_addr_h = (u8)(mtt_addr >> 32); + cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + cq_context->db_rec_addr = cpu_to_be64(db_rec); + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_CQ, + ("CQ: cqn %#x, nent %#x, mtt_base %#I64x, db_rec %#I64x, page_shift %d, log_page_size %#hx, uar_index %#x \n", + cq->cqn, nent, mtt_addr, db_rec, mtt->page_shift, (u16)cq_context->log_page_size, uar->index )); + + err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn); + mlx4_free_cmd_mailbox(dev, mailbox); + if (err) + goto err_radix; + + cq->cons_index = 0; + cq->arm_sn = 1; + cq->uar = uar; + atomic_set(&cq->refcount, 1); + init_completion(&cq->free); + + return 0; + +err_radix: + spin_lock_irq(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock_irq(&cq_table->lock); + +err_cmpt_put: + mlx4_table_put(dev, &cq_table->cmpt_table, cq->cqn); + +err_put: + mlx4_table_put(dev, &cq_table->table, cq->cqn); + +err_out: + mlx4_bitmap_free(&cq_table->bitmap, cq->cqn); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_alloc); + +int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, + struct mlx4_cq_context *context, int modify) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, context, sizeof *context); + err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, modify); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_modify); + +void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cq_table *cq_table = &priv->cq_table; + int err; + + err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn); + if (err) + mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); + + synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq); + + spin_lock_irq(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock_irq(&cq_table->lock); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); + wait_for_completion(&cq->free); + + mlx4_table_put(dev, &cq_table->table, cq->cqn); + mlx4_bitmap_free(&cq_table->bitmap, cq->cqn); +} +EXPORT_SYMBOL_GPL(mlx4_cq_free); + +int mlx4_init_cq_table(struct mlx4_dev *dev) +{ + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; + int err; + + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + + err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs, + dev->caps.num_cqs - 1, dev->caps.reserved_cqs); + if (err) + return err; + + return 0; +} + +void mlx4_cleanup_cq_table(struct mlx4_dev *dev) +{ + /* Nothing to do to clean up radix_tree */ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap); + radix_tree_destroy(&mlx4_priv(dev)->cq_table.tree); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/eq.c b/branches/ConnectX/hw/mlx4/kernel/net/eq.c new file mode 100644 index 00000000..08240ceb --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/eq.c @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "cmd.h" +#include "fw.h" + +enum { + MLX4_NUM_ASYNC_EQE = 0x100, + MLX4_NUM_SPARE_EQE = 0x80, + MLX4_EQ_ENTRY_SIZE = 0x20 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +struct mlx4_eq_context { + __be32 flags; + u16 reserved1[3]; + __be16 page_offset; + u8 log_eq_size; + u8 reserved2[4]; + u8 eq_period; + u8 reserved3; + u8 eq_max_count; + u8 reserved4[3]; + u8 intr; + u8 log_page_size; + u8 reserved5[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + u32 reserved6[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved7[4]; +}; + +#define MLX4_EQ_STATUS_OK ( 0 << 28) +#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) +#define MLX4_EQ_OWNER_SW ( 0 << 24) +#define MLX4_EQ_OWNER_HW ( 1 << 24) +#define MLX4_EQ_FLAG_EC ( 1 << 18) +#define MLX4_EQ_FLAG_OI ( 1 << 17) +#define MLX4_EQ_STATE_ARMED ( 9 << 8) +#define MLX4_EQ_STATE_FIRED (10 << 8) +#define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8) + +#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX4_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ + (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ + (1ull << MLX4_EVENT_TYPE_CMD)) + +#pragma pack(push,1) +struct mlx4_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + __be32 cqn; + } __attribute__((packed)) comp; + struct { + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; + } __attribute__((packed)) cmd; + struct { + __be32 qpn; + } __attribute__((packed)) qp; + struct { + __be32 srqn; + } __attribute__((packed)) srq; + struct { + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } __attribute__((packed)) cq_err; + struct { + u32 reserved1[2]; + __be32 port; + } __attribute__((packed)) port_change; + } event; + u8 reserved3[3]; + u8 owner; +} __attribute__((packed)); +#pragma pack(pop) + +static void eq_set_ci(struct mlx4_eq *eq, int req_not) +{ + __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) | + req_not << 31), + eq->doorbell); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; + return (struct mlx4_eqe *)(eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE); +} + +static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) +{ + struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); + return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; +} + +#pragma warning( disable : 4706) +static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) +{ + struct mlx4_eqe *eqe; + int cqn; + int eqes_found = 0; + int set_ci = 0; + + while ((eqe = next_eqe_sw(eq))) { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + rmb(); + + switch (eqe->type) { + case MLX4_EVENT_TYPE_COMP: + cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; + mlx4_cq_completion(dev, cqn); + break; + + case MLX4_EVENT_TYPE_PATH_MIG: + case MLX4_EVENT_TYPE_COMM_EST: + case MLX4_EVENT_TYPE_SQ_DRAINED: + case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: + case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX4_EVENT_TYPE_PATH_MIG_FAILED: + case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: + mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + eqe->type); + break; + + case MLX4_EVENT_TYPE_SRQ_LIMIT: + case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: + mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, + eqe->type); + break; + + case MLX4_EVENT_TYPE_CMD: + mlx4_cmd_event(dev, + be16_to_cpu(eqe->event.cmd.token), + eqe->event.cmd.status, + be64_to_cpu(eqe->event.cmd.out_param)); + break; + + case MLX4_EVENT_TYPE_PORT_CHANGE: + mlx4_dispatch_event(dev, eqe->type, eqe->subtype, + be32_to_cpu(eqe->event.port_change.port) >> 28); + break; + + case MLX4_EVENT_TYPE_CQ_ERROR: + mlx4_warn(dev, "CQ %s on CQN %06x\n", + eqe->event.cq_err.syndrome == 1 ? + "overrun" : "access violation", + be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff); + mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn), + eqe->type); + break; + + case MLX4_EVENT_TYPE_EQ_OVERFLOW: + mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn); + break; + + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: + case MLX4_EVENT_TYPE_ECC_DETECT: + default: + mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u\n", + eqe->type, eqe->subtype, eq->eqn, eq->cons_index); + break; + }; + + ++eq->cons_index; + eqes_found = 1; + ++set_ci; + + /* + * The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX4_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) { + /* + * Conditional on hca_type is OK here because + * this is a rare case, not the fast path. + */ + eq_set_ci(eq, 0); + set_ci = 0; + } + } + + eq_set_ci(eq, 1); + + return eqes_found; +} +#pragma warning(disable:4706) + +static void mlx4_dpc( PRKDPC dpc, + PVOID ctx, PVOID arg1, PVOID arg2 ) +{ + struct mlx4_eq *eq = ctx; + + UNREFERENCED_PARAMETER(dpc); + UNREFERENCED_PARAMETER(arg1); + UNREFERENCED_PARAMETER(arg2); + + spin_lock_dpc(&eq->lock); + mlx4_eq_int(eq->dev, eq); + spin_unlock_dpc(&eq->lock); +} + +static BOOLEAN mlx4_interrupt( + IN struct _KINTERRUPT *Interrupt, + IN PVOID ServiceContext + ) +{ + struct mlx4_dev *dev = ServiceContext; + struct mlx4_priv *priv = mlx4_priv(dev); + int work = 0; + int i; + + UNUSED_PARAM(Interrupt); + + writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); + + for (i = 0; i < MLX4_NUM_EQ; ++i) { + if ( next_eqe_sw(&priv->eq_table.eq[i]) ) { + work = 1; + /* another interrupt may happen instantly after writel above. + If it comes to another processor, mlx4_interrupt will be called + and try to schedule the same DPC. So we protect KeInsertQueueDpc + from that race */ + + while(InterlockedCompareExchange(&dev->pdev->dpc_lock, 1, 0)); + + KeInsertQueueDpc(&priv->eq_table.eq[i].dpc, NULL, NULL); + InterlockedCompareExchange(&dev->pdev->dpc_lock, 0, 1); + } + else { + /* re-arm the EQ for a case when interrupt comes before EQE + and we didn't scheduled the DPC */ + eq_set_ci(&priv->eq_table.eq[i], 1); + } + } + + return (BOOLEAN)work; +} + +#ifdef CONFIG_PCI_MSI + +/* not ported yet */ +static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr) +{ + struct mlx4_eq *eq = eq_ptr; + struct mlx4_dev *dev = eq->dev; + + mlx4_eq_int(dev, eq); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +#endif + +static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, + int eq_num) +{ + return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num, + 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B); +} + +static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int eq_num) +{ + return mlx4_cmd(dev, mailbox->dma.da, eq_num, 0, MLX4_CMD_SW2HW_EQ, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int eq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox->dma.da, eq_num, 0, MLX4_CMD_HW2SW_EQ, + MLX4_CMD_TIME_CLASS_A); +} + +static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int index; + + index = eq->eqn / 4 - dev->caps.reserved_eqs / 4; + + if (!priv->eq_table.uar_map[index]) { + priv->eq_table.uar_map[index] = + ioremap(pci_resource_start(dev->pdev, 2) + + ((eq->eqn / 4) << PAGE_SHIFT), + PAGE_SIZE); + if (!priv->eq_table.uar_map[index]) { + mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n", + eq->eqn); + return NULL; + } + } + + return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4); +} + +static int mlx4_create_eq(struct mlx4_dev *dev, int nent, + u8 intr, struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_eq_context *eq_context; + int npages; + u64 *dma_list = NULL; + dma_addr_t t; + u64 mtt_addr; + int err = -ENOMEM; + int i; + + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = (int)(NEXT_PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE); + + eq->page_list = kmalloc(npages * sizeof *eq->page_list, + GFP_KERNEL); + if (!eq->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + eq->page_list[i].buf = NULL; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + goto err_out_free; + eq_context = (struct mlx4_eq_context *)mailbox->buf; + + for (i = 0; i < npages; ++i) { + eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); + if (!eq->page_list[i].buf) + goto err_out_free_pages; + + dma_list[i] = t.da; + eq->page_list[i].map = t; + + memset(eq->page_list[i].buf, 0, PAGE_SIZE); + } + + eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap); + if (eq->eqn == -1) + goto err_out_free_pages; + + eq->doorbell = mlx4_get_eq_uar(dev, eq); + if (!eq->doorbell) { + err = -ENOMEM; + goto err_out_free_eq; + } + + err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt); + if (err) + goto err_out_free_eq; + + err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list); + if (err) + goto err_out_free_mtt; + + memset(eq_context, 0, sizeof *eq_context); + eq_context->flags = cpu_to_be32(MLX4_EQ_STATUS_OK | + MLX4_EQ_STATE_ARMED); + eq_context->log_eq_size = (u8)ilog2(eq->nent); + eq_context->intr = intr; + eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT; + + mtt_addr = mlx4_mtt_addr(dev, &eq->mtt); + eq_context->mtt_base_addr_h = (u8)(mtt_addr >> 32); + eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + + err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn); + if (err) { + mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err); + goto err_out_free_mtt; + } + + kfree(dma_list); + mlx4_free_cmd_mailbox(dev, mailbox); + + eq->cons_index = 0; + + return err; + +err_out_free_mtt: + mlx4_mtt_cleanup(dev, &eq->mtt); + +err_out_free_eq: + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); + +err_out_free_pages: + for (i = 0; i < npages; ++i) + if (eq->page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); + + mlx4_free_cmd_mailbox(dev, mailbox); + +err_out_free: + kfree(eq->page_list); + kfree(dma_list); + +err_out: + return err; +} + +static void mlx4_free_eq(struct mlx4_dev *dev, + struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + int err; + int npages = (int)(NEXT_PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE); + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return; + + err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn); + if (err) + mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err); + +#if 0 + { + mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn); + for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpup(mailbox->buf + i * 4)); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } +#endif + + mlx4_mtt_cleanup(dev, &eq->mtt); + for (i = 0; i < npages; ++i) + pci_free_consistent(dev->pdev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); + + kfree(eq->page_list); + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); + mlx4_free_cmd_mailbox(dev, mailbox); +} + +static void mlx4_free_irqs(struct mlx4_dev *dev) +{ + struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table; + + if (eq_table->have_irq) + free_irq(dev->pdev->int_obj); + +#ifdef CONFIG_PCI_MSI + { + int i; + for (i = 0; i < MLX4_NUM_EQ; ++i) + if (eq_table->eq[i].have_irq) + free_irq(eq_table->eq[i].irq, eq_table->eq + i); + } +#endif +} + +static int mlx4_map_clr_int(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) + + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); + if (!priv->clr_base) { + mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n"); + return -ENOMEM; + } + + return 0; +} + +static void mlx4_unmap_clr_int(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + iounmap(priv->clr_base, MLX4_CLR_INT_SIZE); +} + +int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int ret; + + /* + * We assume that mapping one page is enough for the whole EQ + * context table. This is fine with all current HCAs, because + * we only use 32 EQs and each EQ uses 64 bytes of context + * memory, or 1 KB total. + */ + priv->eq_table.icm_virt = icm_virt; + priv->eq_table.icm_page = alloc_page(dev->pdev, GFP_HIGHUSER); + if (!priv->eq_table.icm_page.da) + return -ENOMEM; + priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(priv->eq_table.icm_dma)) { + __free_page(dev->pdev, priv->eq_table.icm_page); + return -ENOMEM; + } + + ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma.da, icm_virt); + if (ret) { + pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + __free_page(dev->pdev, priv->eq_table.icm_page); + } + mlx4_dbg(dev,"mlx4_MAP_ICM_page: dma %#I64x, icm_virt %#I64x\n", priv->eq_table.icm_dma.da, icm_virt); + + return ret; +} + +void mlx4_unmap_eq_icm(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1); + pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + __free_page(dev->pdev, priv->eq_table.icm_page); +} + +int mlx4_init_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + int i; + + err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, + dev->caps.num_eqs - 1, dev->caps.reserved_eqs); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) + priv->eq_table.uar_map[i] = NULL; + + err = mlx4_map_clr_int(dev); + if (err) + goto err_out_free; + + priv->eq_table.clr_mask = + swab32(1 << (priv->eq_table.inta_pin & 31)); + priv->eq_table.clr_int = priv->clr_base + + (priv->eq_table.inta_pin < 32 ? 4 : 0); + + err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0, + &priv->eq_table.eq[MLX4_EQ_COMP]); + if (err) + goto err_out_unmap; + + err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0, + &priv->eq_table.eq[MLX4_EQ_ASYNC]); + if (err) + goto err_out_comp; + +#ifdef CONFIG_PCI_MSI + if (dev->flags & MLX4_FLAG_MSI_X) { + static const char *eq_name[] = { + [MLX4_EQ_COMP] = DRV_NAME " (comp)", + [MLX4_EQ_ASYNC] = DRV_NAME " (async)" + }; + + for (i = 0; i < MLX4_NUM_EQ; ++i) { + err = request_irq(priv->eq_table.eq[i].irq, + mlx4_msi_x_interrupt, + priv->eq_table.eq + i, eq_name[i], ); + if (err) + goto err_out_async; + + priv->eq_table.eq[i].have_irq = 1; + } + + } else +#endif + { +#ifdef USE_WDM_INTERRUPTS + err = request_irq( dev, + dev->pdev->int_info.u.Interrupt.Vector, + mlx4_interrupt, dev, + mlx4_dpc, &priv->eq_table.eq[0], + &dev->pdev->int_obj ); + if (err) + goto err_out_async; +#else + dev->pdev->dpc_lock = 0; + for (i = 0; i < MLX4_NUM_EQ; ++i) { + struct mlx4_eq * eq = &priv->eq_table.eq[i]; + spin_lock_init( &eq->lock ); + eq->isr = mlx4_interrupt; + eq->ctx = dev; + KeInitializeDpc( &eq->dpc, mlx4_dpc, eq); + eq->eq_ix = i; + } +#endif + priv->eq_table.have_irq = 1; + } + + err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + if (err) + mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); + + for (i = 0; i < MLX4_NUM_EQ; ++i) + eq_set_ci(&priv->eq_table.eq[i], 1); + + return 0; + +#ifdef USE_WDM_INTERRUPTS +err_out_async: + mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); +#endif + +err_out_comp: + mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]); + +err_out_unmap: + mlx4_unmap_clr_int(dev); + mlx4_free_irqs(dev); + +err_out_free: + mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + return err; +} + +void mlx4_cleanup_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + + mlx4_free_irqs(dev); + + for (i = 0; i < MLX4_NUM_EQ; ++i) + mlx4_free_eq(dev, &priv->eq_table.eq[i]); + + mlx4_unmap_clr_int(dev); + + for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) + if (priv->eq_table.uar_map[i]) + iounmap(priv->eq_table.uar_map[i],PAGE_SIZE); + + mlx4_bitmap_cleanup(&priv->eq_table.bitmap); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/fw.c b/branches/ConnectX/hw/mlx4/kernel/net/fw.c new file mode 100644 index 00000000..1596c03e --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/fw.c @@ -0,0 +1,841 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fw.h" +#include "cmd.h" +#include "icm.h" + +enum { + MLX4_COMMAND_INTERFACE_MIN_REV = 2, + MLX4_COMMAND_INTERFACE_MAX_REV = 3, + MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS = 3, +}; + +#define MLX4_GET(dest, source, offset) \ + { \ + void *__p = (char *) (source) + (offset); \ + void *__d = &(dest); \ + switch (sizeof (dest)) { \ + case 1: *(u8 *) __d = *(u8 *) __p; break; \ + case 2: *(__be16 *) __d = be16_to_cpup(__p); break; \ + case 4: *(__be32 *) __d = be32_to_cpup(__p); break; \ + case 8: *(__be64 *) __d = be64_to_cpup(__p); break; \ + default: ASSERTMSG("Incorrect dest field\n", !__p); \ + } \ + } + +#define MLX4_PUT(dest, source, offset) \ + { \ + void *__d = ((char *) (dest) + (offset)); \ + switch (sizeof(source)) { \ + case 1: *(u8 *) __d = (u8)(source); break; \ + case 2: *(__be16 *) __d = cpu_to_be16((u16)(source)); break; \ + case 4: *(__be32 *) __d = cpu_to_be32((u32)(source)); break; \ + case 8: *(__be64 *) __d = cpu_to_be64((u64)(source)); break; \ + default: ASSERTMSG("Incorrect dest field\n", !__d); \ + } \ + } + +static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags) +{ + static char *fname[26]; + static int filled = 0; + int i; + + if (!filled) + { + memset( fname, 0, sizeof(fname) ); + fname[0] = "RC transport"; + fname[1] = "UC transport"; + fname[2] = "UD transport"; + fname[3] = "XRC transport"; + fname[4] = "reliable multicast"; + fname[5] = "FCoIB support"; + fname[6] = "SRQ support"; + fname[7] = "IPoIB checksum offload"; + fname[8] = "P_Key violation counter"; + fname[9] = "Q_Key violation counter"; + fname[10] = "VMM"; + fname[16] = "MW support"; + fname[17] = "APM support"; + fname[18] = "Atomic ops support"; + fname[19] = "Raw multicast support"; + fname[20] = "Address vector port checking support"; + fname[21] = "UD multicast support"; + fname[24] = "Demand paging support"; + fname[25] = "Router support"; + } + + mlx4_dbg(dev, "DEV_CAP flags:\n"); + for (i = 0; i < ARRAY_SIZE(fname); ++i) + if (fname[i] && (flags & (1 << i))) + mlx4_dbg(dev, " %s\n", fname[i]); +} + +int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 field; + u16 size; + u16 stat_rate; + int err; + int i; + +#define QUERY_DEV_CAP_OUT_SIZE 0x100 +#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10 +#define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11 +#define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12 +#define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13 +#define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14 +#define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15 +#define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16 +#define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17 +#define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19 +#define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a +#define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b +#define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d +#define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e +#define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f +#define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20 +#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 +#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 +#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 +#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 +#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b +#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f +#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 +#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 +#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 +#define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 +#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 +#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b +#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c +#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 +#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 +#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 +#define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b +#define QUERY_DEV_CAP_BF_OFFSET 0x4c +#define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d +#define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e +#define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f +#define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51 +#define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 +#define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 +#define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 +#define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 +#define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 +#define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 +#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 +#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 +#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 +#define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86 +#define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88 +#define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a +#define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c +#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e +#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 +#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 +#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x97 +#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 +#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma.da, 0, 0, MLX4_CMD_QUERY_DEV_CAP, + MLX4_CMD_TIME_CLASS_A); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); + dev_cap->reserved_qps = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); + dev_cap->max_qps = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET); + dev_cap->reserved_srqs = 1 << (field >> 4); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET); + dev_cap->max_srqs = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET); + dev_cap->max_cq_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET); + dev_cap->reserved_cqs = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET); + dev_cap->max_cqs = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); + dev_cap->max_mpts = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); + dev_cap->reserved_eqs = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); + dev_cap->max_eqs = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); + dev_cap->reserved_mtts = 1 << (field >> 4); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); + dev_cap->max_mrw_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); + dev_cap->reserved_mrws = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); + dev_cap->max_mtt_seg = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); + dev_cap->max_requester_per_qp = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); + dev_cap->max_responder_per_qp = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); + dev_cap->max_rdma_global = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); + dev_cap->local_ca_ack_delay = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); + dev_cap->num_ports = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); + dev_cap->max_msg_sz = 1 << (field & 0x1f); + MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); + dev_cap->stat_rate_support = stat_rate; + MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); + dev_cap->reserved_uars = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); + dev_cap->uar_size = 1 << ((field & 0x3f) + 20); + MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET); + dev_cap->min_page_sz = 1 << field; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET); + if (field & 0x80) { + MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET); + dev_cap->bf_reg_size = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET); + dev_cap->bf_regs_per_page = 1 << (field & 0x3f); + mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n", + dev_cap->bf_reg_size, dev_cap->bf_regs_per_page); + } else { + dev_cap->bf_reg_size = 0; + mlx4_dbg(dev, "BlueFlame not available\n"); + } + + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET); + dev_cap->max_sq_sg = field; + MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); + dev_cap->max_sq_desc_sz = size; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); + dev_cap->max_qp_per_mcg = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); + dev_cap->reserved_mgms = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET); + dev_cap->max_mcgs = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET); + dev_cap->reserved_pds = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); + dev_cap->max_pds = 1 << (field & 0x3f); + + MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); + dev_cap->rdmarc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET); + dev_cap->qpc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET); + dev_cap->aux_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET); + dev_cap->altc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET); + dev_cap->eqc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET); + dev_cap->cqc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET); + dev_cap->srq_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET); + dev_cap->cmpt_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET); + dev_cap->mtt_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET); + dev_cap->dmpt_entry_sz = size; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET); + dev_cap->max_srq_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET); + dev_cap->max_qp_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET); + dev_cap->resize_srq = field & 1; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET); + dev_cap->max_rq_sg = field; + MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); + dev_cap->max_rq_desc_sz = size; + + MLX4_GET(dev_cap->bmme_flags, outbox, + QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + MLX4_GET(dev_cap->reserved_lkey, outbox, + QUERY_DEV_CAP_RSVD_LKEY_OFFSET); + MLX4_GET(dev_cap->max_icm_sz, outbox, + QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); + + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + for (i = 1; i <= dev_cap->num_ports; ++i) { + MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); + dev_cap->max_vl[i] = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); + dev_cap->max_mtu[i] = field >> 4; + dev_cap->max_port_width[i] = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); + dev_cap->max_gids[i] = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); + dev_cap->max_pkeys[i] = 1 << (field & 0xf); + } + } else { +#define QUERY_PORT_MTU_OFFSET 0x01 +#define QUERY_PORT_WIDTH_OFFSET 0x06 +#define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 +#define QUERY_PORT_MAX_VL_OFFSET 0x0b + + for (i = 1; i <= dev_cap->num_ports; ++i) { + err = mlx4_cmd_box(dev, 0, mailbox->dma.da, i, 0, MLX4_CMD_QUERY_PORT, + MLX4_CMD_TIME_CLASS_B); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); + dev_cap->max_mtu[i] = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); + dev_cap->max_port_width[i] = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); + dev_cap->max_gids[i] = 1 << (field >> 4); + dev_cap->max_pkeys[i] = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); + dev_cap->max_vl[i] = field & 0xf; + } + } + + if (dev_cap->bmme_flags & 1) + mlx4_dbg(dev, "Base MM extensions: yes " + "(flags %d, rsvd L_Key %08x)\n", + dev_cap->bmme_flags, dev_cap->reserved_lkey); + else + mlx4_dbg(dev, "Base MM extensions: no\n"); + + /* + * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then + * we can't use any EQs whose doorbell falls on that page, + * even if the EQ itself isn't reserved. + */ + dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, + dev_cap->reserved_eqs); + + mlx4_dbg(dev, "Max ICM size %lld MB\n", + (unsigned long long) dev_cap->max_icm_sz >> 20); + mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", + dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz); + mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); + mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", + dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); + mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); + mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", + dev_cap->reserved_mrws, dev_cap->reserved_mtts); + mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", + dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars); + mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", + dev_cap->max_pds, dev_cap->reserved_mgms); + mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); + mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", + dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu[1], + dev_cap->max_port_width[1]); + mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", + dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); + mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", + dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); + + dump_dev_cap_flags(dev, dev_cap->flags); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_icm_iter iter; + __be64 *pages; + int lg; + int nent = 0; + unsigned int i; + int err = 0; + int ts = 0, tc = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE); + pages = mailbox->buf; + + for (mlx4_icm_first(icm, &iter); + !mlx4_icm_last(&iter); + mlx4_icm_next(&iter)) { + /* + * We have to pass pages that are aligned to their + * size, so find the least significant 1 in the + * address or size and use that as our log2 size. + */ + unsigned long end = (unsigned long)(mlx4_icm_addr(&iter).da | mlx4_icm_size(&iter)); + lg = ffs(end) - 1; + if (lg < MLX4_ICM_PAGE_SHIFT) { + mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n", + MLX4_ICM_PAGE_SIZE, + (unsigned long long) mlx4_icm_addr(&iter).da, + mlx4_icm_size(&iter)); + err = -EINVAL; + goto out; + } + + for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { + if (virt != -1) { + pages[nent * 2] = cpu_to_be64(virt); + virt += 1I64 << lg; + } + + pages[nent * 2 + 1] = + cpu_to_be64((mlx4_icm_addr(&iter).da + (i << lg)) | + (lg - MLX4_ICM_PAGE_SHIFT)); + ts += 1 << (lg - 10); + ++tc; + + if (++nent == MLX4_MAILBOX_SIZE / 16) { + err = mlx4_cmd(dev, mailbox->dma.da, nent, 0, op, + MLX4_CMD_TIME_CLASS_B); + if (err) + goto out; + nent = 0; + } + } + } + + if (nent) + err = mlx4_cmd(dev, mailbox->dma.da, nent, 0, op, MLX4_CMD_TIME_CLASS_B); + if (err) + goto out; + + switch (op) { + case MLX4_CMD_MAP_FA: + mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts); + break; + case MLX4_CMD_MAP_ICM_AUX: + mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts); + break; + case MLX4_CMD_MAP_ICM: + mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n", + tc, ts, (unsigned long long) virt - (ts << 10)); + break; + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm) +{ + return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, (u64)-1); +} + +int mlx4_UNMAP_FA(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B); +} + + +int mlx4_RUN_FW(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A); +} + +int mlx4_QUERY_FW(struct mlx4_dev *dev) +{ + struct mlx4_fw *fw = &mlx4_priv(dev)->fw; + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + int err = 0; + u64 fw_ver; + u16 cmd_if_rev; + u8 lg; + +#define QUERY_FW_OUT_SIZE 0x100 +#define QUERY_FW_VER_OFFSET 0x00 +#define QUERY_FW_CMD_IF_REV_OFFSET 0x0a +#define QUERY_FW_MAX_CMD_OFFSET 0x0f +#define QUERY_FW_ERR_START_OFFSET 0x30 +#define QUERY_FW_ERR_SIZE_OFFSET 0x38 +#define QUERY_FW_ERR_BAR_OFFSET 0x3c + +#define QUERY_FW_SIZE_OFFSET 0x00 +#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 +#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma.da, 0, 0, MLX4_CMD_QUERY_FW, + MLX4_CMD_TIME_CLASS_A); + if (err) + goto out; + + MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET); + /* + * FW subminor version is at more significant bits than minor + * version, so swap here. + */ + dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) | + ((fw_ver & 0xffff0000ull) >> 16) | + ((fw_ver & 0x0000ffffull) << 16); + + MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); + if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || + cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) { + mlx4_err(dev, "Installed FW has unsupported " + "command interface revision %d.\n", + cmd_if_rev); + mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n", + (int) (dev->caps.fw_ver >> 32), + (int) (dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->caps.fw_ver & 0xffff); + mlx4_err(dev, "This driver version supports only revisions %d to %d.\n", + MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV); + err = -ENODEV; + goto out; + } + + if (cmd_if_rev < MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS) + dev->flags |= MLX4_FLAG_OLD_PORT_CMDS; + + MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); + cmd->max_cmds = 1 << lg; + + mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n", + (int) (dev->caps.fw_ver >> 32), + (int) (dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->caps.fw_ver & 0xffff, + cmd_if_rev, cmd->max_cmds); + + MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET); + MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET); + MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET); + fw->catas_bar = (fw->catas_bar >> 6) * 2; + + mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n", + (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar); + + MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET); + MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); + MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET); + fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2; + + mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); + + /* + * Round up number of system pages needed in case + * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. + */ + fw->fw_pages = + ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); + + mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n", + (unsigned long long) fw->clr_int_base, fw->clr_int_bar); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static void get_board_id(u8 *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + memset(board_id, 0, MLX4_BOARD_ID_LEN); + + if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && + be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + for (i = 0; i < 4; ++i) + ((u32 *) board_id)[i] = + swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + } +} + +int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + int err; + +#define QUERY_ADAPTER_OUT_SIZE 0x100 +#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma.da, 0, 0, MLX4_CMD_QUERY_ADAPTER, + MLX4_CMD_TIME_CLASS_A); + if (err) + goto out; + + MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + + get_board_id((u8*)(outbox + QUERY_ADAPTER_VSD_OFFSET / 4), + adapter->board_id); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) +{ + struct mlx4_cmd_mailbox *mailbox; + __be32 *inbox; + int err; + +#define INIT_HCA_IN_SIZE 0x200 +#define INIT_HCA_VERSION_OFFSET 0x000 +#define INIT_HCA_VERSION 2 +#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e +#define INIT_HCA_X86_64_BYTE_CACHELINE_SZ 0x40 +#define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_QPC_OFFSET 0x020 +#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) +#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) +#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) +#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) +#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) +#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) +#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) +#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) +#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) +#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) +#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) +#define INIT_HCA_MCAST_OFFSET 0x0c0 +#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) +#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_TPT_OFFSET 0x0f0 +#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) +#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) +#define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) +#define INIT_HCA_UAR_OFFSET 0x120 +#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) +#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, INIT_HCA_IN_SIZE); + + *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; +#if defined(_AMD64_) + *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = INIT_HCA_X86_64_BYTE_CACHELINE_SZ; +#endif + +#if defined(__LITTLE_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); +#elif defined(__BIG_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); +#else +#error Host endianness not defined +#endif + /* Check port for UD address vector: */ + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + + /* Enable QoS support if module parameter set */ + if (g.enable_qos) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); + + /* QPC/EEC/CQC/EQC/RDMARC attributes */ + + MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); + MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); + MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); + MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET); + MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); + MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); + + /* multicast attributes */ + + MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + + /* TPT attributes */ + + MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); + MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); + MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); + + /* UAR attributes */ + + MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); + MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + + err = mlx4_cmd(dev, mailbox->dma.da, 0, 0, MLX4_CMD_INIT_HCA, 10000); + + if (err) + mlx4_err(dev, "INIT_HCA returns %d\n", err); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *inbox; + int err; + u32 flags; + u16 field; + + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { +#define INIT_PORT_IN_SIZE 256 +#define INIT_PORT_FLAGS_OFFSET 0x00 +#define INIT_PORT_FLAG_SIG (1 << 18) +#define INIT_PORT_FLAG_NG (1 << 17) +#define INIT_PORT_FLAG_G0 (1 << 16) +#define INIT_PORT_VL_SHIFT 4 +#define INIT_PORT_PORT_WIDTH_SHIFT 8 +#define INIT_PORT_MTU_OFFSET 0x04 +#define INIT_PORT_MAX_GID_OFFSET 0x06 +#define INIT_PORT_MAX_PKEY_OFFSET 0x0a +#define INIT_PORT_GUID0_OFFSET 0x10 +#define INIT_PORT_NODE_GUID_OFFSET 0x18 +#define INIT_PORT_SI_GUID_OFFSET 0x20 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, INIT_PORT_IN_SIZE); + + flags = 0; + flags |= (dev->caps.vl_cap[port] & 0xf) << INIT_PORT_VL_SHIFT; + flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; + MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); + + field = (u16)(128 << dev->caps.mtu_cap[port]); + MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); + field = (u16)dev->caps.gid_table_len[port]; + MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); + field = (u16)dev->caps.pkey_table_len[port]; + MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET); + + err = mlx4_cmd(dev, mailbox->dma.da, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(dev, mailbox); + } else + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_INIT_PORT); + +int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port) +{ + return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000); +} +EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT); + +int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) +{ + return mlx4_cmd(dev, 0, 0, (u8)panic, MLX4_CMD_CLOSE_HCA, 1000); +} + +int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) +{ + int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0, + MLX4_CMD_SET_ICM_SIZE, + MLX4_CMD_TIME_CLASS_A); + if (ret) + return ret; + + /* + * Round up number of system pages needed in case + * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. + */ + *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); + + return 0; +} + +int mlx4_NOP(struct mlx4_dev *dev) +{ + /* Input modifier of 0x1f means "finish as soon as possible." */ + return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/fw.h b/branches/ConnectX/hw/mlx4/kernel/net/fw.h new file mode 100644 index 00000000..e16dec89 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/fw.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_FW_H +#define MLX4_FW_H + +#include "mlx4.h" +#include "icm.h" + +struct mlx4_dev_cap { + int max_srq_sz; + int max_qp_sz; + int reserved_qps; + int max_qps; + int reserved_srqs; + int max_srqs; + int max_cq_sz; + int reserved_cqs; + int max_cqs; + int max_mpts; + int reserved_eqs; + int max_eqs; + int reserved_mtts; + int max_mrw_sz; + int reserved_mrws; + int max_mtt_seg; + int max_requester_per_qp; + int max_responder_per_qp; + int max_rdma_global; + int local_ca_ack_delay; + int num_ports; + u32 max_msg_sz; + int max_mtu[MLX4_MAX_PORTS + 1]; + int max_port_width[MLX4_MAX_PORTS + 1]; + int max_vl[MLX4_MAX_PORTS + 1]; + int max_gids[MLX4_MAX_PORTS + 1]; + int max_pkeys[MLX4_MAX_PORTS + 1]; + u16 stat_rate_support; + u32 flags; + int reserved_uars; + int uar_size; + int min_page_sz; + int bf_reg_size; + int bf_regs_per_page; + int max_sq_sg; + int max_sq_desc_sz; + int max_rq_sg; + int max_rq_desc_sz; + int max_qp_per_mcg; + int reserved_mgms; + int max_mcgs; + int reserved_pds; + int max_pds; + int qpc_entry_sz; + int rdmarc_entry_sz; + int altc_entry_sz; + int aux_entry_sz; + int srq_entry_sz; + int cqc_entry_sz; + int eqc_entry_sz; + int dmpt_entry_sz; + int cmpt_entry_sz; + int mtt_entry_sz; + int resize_srq; + u8 bmme_flags; + u32 reserved_lkey; + u64 max_icm_sz; +}; + +struct mlx4_adapter { + char board_id[MLX4_BOARD_ID_LEN]; + u8 inta_pin; +}; + +struct mlx4_init_hca_param { + u64 qpc_base; + u64 rdmarc_base; + u64 auxc_base; + u64 altc_base; + u64 srqc_base; + u64 cqc_base; + u64 eqc_base; + u64 mc_base; + u64 dmpt_base; + u64 cmpt_base; + u64 mtt_base; + u16 log_mc_entry_sz; + u16 log_mc_hash_sz; + u8 log_num_qps; + u8 log_num_srqs; + u8 log_num_cqs; + u8 log_num_eqs; + u8 log_rd_per_qp; + u8 log_mc_table_sz; + u8 log_mpt_sz; + u8 log_uar_sz; +}; + +struct mlx4_init_ib_param { + int port_width; + int vl_cap; + int mtu_cap; + u16 gid_cap; + u16 pkey_cap; + int set_guid0; + u64 guid0; + int set_node_guid; + u64 node_guid; + int set_si_guid; + u64 si_guid; +}; + +struct mlx4_set_ib_param { + int set_si_guid; + int reset_qkey_viol; + u64 si_guid; + u32 cap_mask; +}; + +int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); +int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_FA(struct mlx4_dev *dev); +int mlx4_RUN_FW(struct mlx4_dev *dev); +int mlx4_QUERY_FW(struct mlx4_dev *dev); +int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter); +int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param); +int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic); +int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt); +int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages); +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); +int mlx4_NOP(struct mlx4_dev *dev); + +#endif /* MLX4_FW_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/net/icm.c b/branches/ConnectX/hw/mlx4/kernel/net/icm.c new file mode 100644 index 00000000..a87fa1c7 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/icm.c @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "cmd.h" +#include "icm.h" +#include "fw.h" + +/* + * We allocate in as big chunks as we can, up to a maximum of 256 KB + * per chunk. + */ +enum { + MLX4_ICM_ALLOC_SIZE = 1 << 18, + MLX4_TABLE_CHUNK_SIZE = 1 << 18 +}; + +static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) +{ + int i; + + if (chunk->nsg > 0) + pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + for (i = 0; i < chunk->npages; ++i) + __free_pages(dev->pdev, sg_page(&chunk->mem[i]), + get_order(chunk->mem[i].dma_addr.sz)); +} + +static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) +{ + int i; + + for (i = 0; i < chunk->npages; ++i) + dma_free_coherent(&dev->pdev->dev, chunk->mem[i].dma_addr.sz, + lowmem_page_address(sg_page(&chunk->mem[i])), + sg_dma_addr(&chunk->mem[i])); +} + +void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) +{ + struct mlx4_icm_chunk *chunk, *tmp; + + if (!icm) + return; + + list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list, struct mlx4_icm_chunk, struct mlx4_icm_chunk) { + if (coherent) + mlx4_free_icm_coherent(dev, chunk); + else + mlx4_free_icm_pages(dev, chunk); + + kfree(chunk); + } + + kfree(icm); +} + +static int mlx4_alloc_icm_pages(struct pci_dev *pdev, + struct scatterlist *mem, int order, gfp_t gfp_mask) +{ + dma_addr_t page; + + page = alloc_pages(pdev, gfp_mask, order); + if (!page.da) + return -ENOMEM; + + sg_set_page(mem, page, PAGE_SIZE << order, 0); + return 0; +} + +static int mlx4_alloc_icm_coherent(struct mlx4_dev **dev, struct scatterlist *mem, + int order, gfp_t gfp_mask) +{ + void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, + &sg_dma_addr(mem), gfp_mask); + if (!buf) + return -ENOMEM; + + sg_set_buf(mem, buf, PAGE_SIZE << order); + BUG_ON(mem->offset); + sg_dma_len(mem) = PAGE_SIZE << order; + return 0; +} + +struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, + gfp_t gfp_mask, int coherent) +{ + struct mlx4_icm *icm; + struct mlx4_icm_chunk *chunk = NULL; + int cur_order; + int ret; + + /* We use sg_set_buf for coherent allocs, which assumes low memory */ + BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); + + icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!icm) + return NULL; + + icm->refcount = 0; + INIT_LIST_HEAD(&icm->chunk_list); + + cur_order = get_order(MLX4_ICM_ALLOC_SIZE); + + while (npages > 0) { + if (!chunk) { + chunk = kmalloc(sizeof *chunk, + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!chunk) + goto fail; + + sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); + chunk->npages = 0; + chunk->nsg = 0; + list_add_tail(&chunk->list, &icm->chunk_list); + } + + while (1 << cur_order > npages) + --cur_order; + + if (coherent) + ret = mlx4_alloc_icm_coherent(&dev->pdev->dev, + &chunk->mem[chunk->npages], + cur_order, gfp_mask); + else + ret = mlx4_alloc_icm_pages(dev->pdev, &chunk->mem[chunk->npages], + cur_order, gfp_mask); + + if (!ret) { + ++chunk->npages; + + if (coherent) + ++chunk->nsg; + else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { + chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + if (chunk->nsg <= 0) + goto fail; + + chunk = NULL; + } + + npages -= 1 << cur_order; + } else { + --cur_order; + if (cur_order < 0) + goto fail; + } + } + + if (!coherent && chunk) { + chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + if (chunk->nsg <= 0) + goto fail; + } + + return icm; + +fail: + mlx4_free_icm(dev, icm, coherent); + return NULL; +} + +static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt) +{ + return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt); +} + +int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count) +{ + return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM, + MLX4_CMD_TIME_CLASS_B); +} + +int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt) +{ + struct mlx4_cmd_mailbox *mailbox; + __be64 *inbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + inbox[0] = cpu_to_be64(virt); + inbox[1] = cpu_to_be64(dma_addr); + + err = mlx4_cmd(dev, mailbox->dma.da, 1, 0, MLX4_CMD_MAP_ICM, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + + if (!err) + mlx4_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", + (unsigned long long) dma_addr, (unsigned long long) virt); + + return err; +} + +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm) +{ + return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, (u64)-1); +} + +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B); +} + +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) +{ + int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); + int ret = 0; + + mutex_lock(&table->mutex); + + if (table->icm[i]) { + ++table->icm[i]->refcount; + goto out; + } + + table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN, table->coherent); + if (!table->icm[i]) { + ret = -ENOMEM; + goto out; + } + + if (mlx4_MAP_ICM(dev, table->icm[i], table->virt + + (u64) i * MLX4_TABLE_CHUNK_SIZE)) { + mlx4_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + ret = -ENOMEM; + goto out; + } + + ++table->icm[i]->refcount; + +out: + mutex_unlock(&table->mutex); + return ret; +} + +void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) +{ + int i; + + i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); + + mutex_lock(&table->mutex); + + if (--table->icm[i]->refcount == 0) { + mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + mlx4_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + } + + mutex_unlock(&table->mutex); +} + +void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle) +{ + int idx, offset, dma_offset, i; + struct mlx4_icm_chunk *chunk; + struct mlx4_icm *icm; + dma_addr_t page = { 0 } ; + + if (!table->lowmem) + return NULL; + + mutex_lock(&table->mutex); + + idx = (obj & (table->num_obj - 1)) * table->obj_size; + icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE; + + if (!icm) + goto out; + + list_for_each_entry(chunk, &icm->chunk_list, list, struct mlx4_icm_chunk) { + for (i = 0; i < chunk->npages; ++i) { + if (dma_handle && dma_offset >= 0) { + if (sg_dma_len(&chunk->mem[i]) > (unsigned)dma_offset) + { + *dma_handle = sg_dma_addr(&chunk->mem[i]); + sg_dma_address_inc(dma_handle,dma_offset); + } + dma_offset -= sg_dma_len(&chunk->mem[i]); + } + /* + * DMA mapping can merge pages but not split them, + * so if we found the page, dma_handle has already + * been assigned to. + */ + if (chunk->mem[i].dma_addr.sz > (unsigned)offset) { + page = sg_page(&chunk->mem[i]); + goto out; + } + offset -= chunk->mem[i].dma_addr.sz; + } + } + +out: + mutex_unlock(&table->mutex); + return page.da ? (u8*)lowmem_page_address(page) + offset : NULL; +} + +int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + int start, int end) +{ + int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size; + int i, err; + + for (i = start; i <= end; i += inc) { + err = mlx4_table_get(dev, table, i); + if (err) + goto fail; + } + + return 0; + +fail: + while (i > start) { + i -= inc; + mlx4_table_put(dev, table, i); + } + + return err; +} + +void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + int start, int end) +{ + int i; + + for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size) + mlx4_table_put(dev, table, i); +} + +int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u64 virt, int obj_size, int nobj, int reserved, + int use_lowmem, int use_coherent) +{ + int obj_per_chunk; + int num_icm; + unsigned chunk_size; + int i; + + obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; + num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; + + table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL); + if (!table->icm) + return -ENOMEM; + table->virt = virt; + table->num_icm = num_icm; + table->num_obj = nobj; + table->obj_size = obj_size; + table->lowmem = use_lowmem; + table->coherent = use_coherent; + mutex_init(&table->mutex); + + for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { + chunk_size = MLX4_TABLE_CHUNK_SIZE; + if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size) + chunk_size = (unsigned)NEXT_PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE); + + table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT, + (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN, use_coherent); + if (!table->icm[i]) + goto err; + if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) { + mlx4_free_icm(dev, table->icm[i], use_coherent); + table->icm[i] = NULL; + goto err; + } + + /* + * Add a reference to this ICM chunk so that it never + * gets freed (since it contains reserved firmware objects). + */ + ++table->icm[i]->refcount; + } + + return 0; + +err: + for (i = 0; i < num_icm; ++i) + if (table->icm[i]) { + mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + mlx4_free_icm(dev, table->icm[i], use_coherent); + } + + return -ENOMEM; +} + +void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table) +{ + int i; + + for (i = 0; i < table->num_icm; ++i) + if (table->icm[i]) { + mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + mlx4_free_icm(dev, table->icm[i], table->coherent); + } + + kfree(table->icm); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/icm.h b/branches/ConnectX/hw/mlx4/kernel/net/icm.h new file mode 100644 index 00000000..dc15df96 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/icm.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_ICM_H +#define MLX4_ICM_H + +#define MLX4_ICM_CHUNK_LEN \ + ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ + (sizeof (struct scatterlist))) + +enum { + MLX4_ICM_PAGE_SHIFT = 12, + MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT, +}; + +struct mlx4_icm_chunk { + struct list_head list; + int npages; + int nsg; + struct scatterlist mem[MLX4_ICM_CHUNK_LEN]; +}; + +struct mlx4_icm { + struct list_head chunk_list; + int refcount; +}; + +struct mlx4_icm_iter { + struct mlx4_icm *icm; + struct mlx4_icm_chunk *chunk; + int page_idx; +}; + +struct mlx4_dev; + +struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, + gfp_t gfp_mask, int coherent); +void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); + +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); +void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); +int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + int start, int end); +void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + int start, int end); +int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u64 virt, int obj_size, int nobj, int reserved, + int use_lowmem, int use_coherent); +void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); +void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); +void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle); +int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + int start, int end); +void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + int start, int end); + +static inline void mlx4_icm_first(struct mlx4_icm *icm, + struct mlx4_icm_iter *iter) +{ + iter->icm = icm; + iter->chunk = list_empty(&icm->chunk_list) ? + NULL : list_entry(icm->chunk_list.next, + struct mlx4_icm_chunk, list); + iter->page_idx = 0; +} + +static inline int mlx4_icm_last(struct mlx4_icm_iter *iter) +{ + return !iter->chunk; +} + +static inline void mlx4_icm_next(struct mlx4_icm_iter *iter) +{ + if (++iter->page_idx >= iter->chunk->nsg) { + if (iter->chunk->list.next == &iter->icm->chunk_list) { + iter->chunk = NULL; + return; + } + + iter->chunk = list_entry(iter->chunk->list.next, + struct mlx4_icm_chunk, list); + iter->page_idx = 0; + } +} + +static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter) +{ + return sg_dma_addr(&iter->chunk->mem[iter->page_idx]); +} + +static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter) +{ + return sg_dma_len(&iter->chunk->mem[iter->page_idx]); +} + +int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count); +int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt); +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); + +#endif /* MLX4_ICM_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/net/intf.c b/branches/ConnectX/hw/mlx4/kernel/net/intf.c new file mode 100644 index 00000000..249745ba --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/intf.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "driver.h" + +struct mlx4_device_context { + struct list_head list; + struct mlx4_interface *intf; + void *context; +}; + +static LIST_HEAD(intf_list); +static LIST_HEAD(dev_list); +static DEFINE_MUTEX(intf_mutex); + +static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) +{ + struct mlx4_device_context *dev_ctx; + + dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(&priv->dev); + priv->dev.pdev->ib_dev = dev_ctx->context; + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + } else + kfree(dev_ctx); +} + +static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv) +{ + struct mlx4_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list, struct mlx4_device_context) + if (dev_ctx->intf == intf) { + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + intf->remove(&priv->dev, dev_ctx->context); + kfree(dev_ctx); + return; + } +} + +int mlx4_register_interface(struct mlx4_interface *intf) +{ + struct mlx4_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&intf_mutex); + + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv) + mlx4_add_device(intf, priv); + + mutex_unlock(&intf_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_register_interface); + +void mlx4_unregister_interface(struct mlx4_interface *intf) +{ + struct mlx4_priv *priv; + + mutex_lock(&intf_mutex); + + list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv) + mlx4_remove_device(intf, priv); + + list_del(&intf->list); + + mutex_unlock(&intf_mutex); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_interface); + +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, + int subtype, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx; + + spin_lock_dpc(&priv->ctx_lock); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list, struct mlx4_device_context) + if (dev_ctx->intf->event) + dev_ctx->intf->event(dev, dev_ctx->context, type, + subtype, port); + + spin_unlock_dpc(&priv->ctx_lock); +} + +int mlx4_register_device(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_interface *intf; + + mutex_lock(&intf_mutex); + + list_add_tail(&priv->dev_list, &dev_list); + list_for_each_entry(intf, &intf_list, list, struct mlx4_interface) + mlx4_add_device(intf, priv); + + mutex_unlock(&intf_mutex); + if (!mlx4_is_livefish(dev)) + mlx4_start_catas_poll(dev); + + return 0; +} + +void mlx4_unregister_device(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_interface *intf; + + if (!mlx4_is_livefish(dev)) + mlx4_stop_catas_poll(dev); + mutex_lock(&intf_mutex); + + list_for_each_entry(intf, &intf_list, list, struct mlx4_interface) + mlx4_remove_device(intf, priv); + + list_del(&priv->dev_list); + + mutex_unlock(&intf_mutex); +} + +void mlx4_intf_init() +{ + mutex_init(&intf_mutex); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/main.c b/branches/ConnectX/hw/mlx4/kernel/net/main.c new file mode 100644 index 00000000..d99c7270 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/main.c @@ -0,0 +1,959 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "fw.h" +#include "icm.h" +#include "device.h" +#include "doorbell.h" +#include "complib\cl_thread.h" + +// TODO: put into Globals +#ifdef CONFIG_MLX4_DEBUG +// "Enable debug tracing if > 0" +int debug_level = 1; +#endif /* CONFIG_MLX4_DEBUG */ + +#ifdef CONFIG_PCI_MSI + +// "attempt to use MSI-X if nonzero" +static int msi_x = 1; + +#else /* CONFIG_PCI_MSI */ + +#define msi_x (0) + +#endif /* CONFIG_PCI_MSI */ + +static struct mlx4_profile default_profile = { + 1 << 17, /* num_qp */ + 1 << 4, /* rdmarc_per_qp */ + 1 << 16, /* num_srq */ + 1 << 16, /* num_cq */ + 1 << 13, /* num_mcg */ + 1 << 17, /* num_mpt */ + 1 << 20 /* num_mtt */ +}; + +static void process_mod_param_profile(void) +{ + if (g.mod_num_qp) + default_profile.num_qp = 1 << g.mod_num_qp; + + if (g.mod_rdmarc_per_qp) + default_profile.rdmarc_per_qp = 1 << g.mod_rdmarc_per_qp; + + if (g.mod_num_srq) + default_profile.num_srq = 1 << g.mod_num_srq; + + if (g.mod_num_cq) + default_profile.num_cq = 1 << g.mod_num_cq; + + if (g.mod_num_mcg) + default_profile.num_mcg = 1 << g.mod_num_mcg; + + if (g.mod_num_mpt) + default_profile.num_mpt = 1 << g.mod_num_mpt; + + if (g.mod_num_mtt) + default_profile.num_mtt = 1 << g.mod_num_mtt; +} + +static struct pci_device_id +mlx4_pci_table[] = { + HCA(MELLANOX, SDR, HERMON), + HCA(MELLANOX, DDR, HERMON), + HCA(MELLANOX, QDR, HERMON), + HCA(MELLANOX, DDR_G2, HERMON), + HCA(MELLANOX, QDR_G2, HERMON), + HCA(MELLANOX, BD, LIVEFISH), +}; +#define MLX4_PCI_TABLE_SIZE (sizeof(mlx4_pci_table)/sizeof(struct pci_device_id)) + +static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + int err; + int i; + + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + return err; + } + + if (dev_cap->min_page_sz > PAGE_SIZE) { + mlx4_err(dev, "HCA minimum page size of %d bigger than " + "kernel PAGE_SIZE of %ld, aborting.\n", + dev_cap->min_page_sz, PAGE_SIZE); + return -ENODEV; + } + if (dev_cap->num_ports > MLX4_MAX_PORTS) { + mlx4_err(dev, "HCA has %d ports, but we only support %d, " + "aborting.\n", + dev_cap->num_ports, MLX4_MAX_PORTS); + return -ENODEV; + } + + if (dev_cap->uar_size > (int)pci_resource_len(dev->pdev, 2)) { + mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " + "PCI resource 2 size of 0x%llx, aborting.\n", + dev_cap->uar_size, + (unsigned long long) pci_resource_len(dev->pdev, 2)); + return -ENODEV; + } + + dev->caps.num_ports = dev_cap->num_ports; + for (i = 1; i <= dev->caps.num_ports; ++i) { + dev->caps.vl_cap[i] = dev_cap->max_vl[i]; + dev->caps.mtu_cap[i] = dev_cap->max_mtu[i]; + dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; + dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; + dev->caps.port_width_cap[i] = (u8)dev_cap->max_port_width[i]; + } + + dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; + dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; + dev->caps.bf_reg_size = dev_cap->bf_reg_size; + dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; + dev->caps.max_sq_sg = dev_cap->max_sq_sg; + dev->caps.max_rq_sg = dev_cap->max_rq_sg; + dev->caps.max_wqes = dev_cap->max_qp_sz; + dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; + dev->caps.reserved_qps = dev_cap->reserved_qps; + dev->caps.max_srq_wqes = dev_cap->max_srq_sz; + dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; + dev->caps.reserved_srqs = dev_cap->reserved_srqs; + dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; + dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; + dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + dev->caps.max_cqes = dev_cap->max_cq_sz - 1; + dev->caps.reserved_cqs = dev_cap->reserved_cqs; + dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts, + MLX4_MTT_ENTRY_PER_SEG); + dev->caps.reserved_mrws = dev_cap->reserved_mrws; + dev->caps.reserved_uars = dev_cap->reserved_uars; + dev->caps.reserved_pds = dev_cap->reserved_pds; + dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + dev->caps.max_msg_sz = dev_cap->max_msg_sz; + dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); + dev->caps.flags = dev_cap->flags; + dev->caps.stat_rate_support = dev_cap->stat_rate_support; + + return 0; +} + +static int __devinit mlx4_load_fw(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, + GFP_HIGHUSER | __GFP_NOWARN, 0); + if (!priv->fw.fw_icm) { + mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); + return -ENOMEM; + } + + err = mlx4_MAP_FA(dev, priv->fw.fw_icm); + if (err) { + mlx4_err(dev, "MAP_FA command failed, aborting.\n"); + goto err_free; + } + + err = mlx4_RUN_FW(dev); + if (err) { + mlx4_err(dev, "RUN_FW command failed, aborting.\n"); + goto err_unmap_fa; + } + + return 0; + +err_unmap_fa: + mlx4_UNMAP_FA(dev); + +err_free: + mlx4_free_icm(dev, priv->fw.fw_icm, 0); + return err; +} + +static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, + int cmpt_entry_sz) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_QP * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, dev->caps.num_qps, + dev->caps.reserved_qps, 0, 0); + if (err) + goto err; + + err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_SRQ * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, dev->caps.num_srqs, + dev->caps.reserved_srqs, 0, 0); + if (err) + goto err_qp; + + err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_CQ * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, dev->caps.num_cqs, + dev->caps.reserved_cqs, 0, 0); + if (err) + goto err_srq; + + err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_EQ * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, + roundup_pow_of_two(MLX4_NUM_EQ + + dev->caps.reserved_eqs), + MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0); + if (err) + goto err_cq; + + return 0; + +err_cq: + mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); + +err_srq: + mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); + +err_qp: + mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); + +err: + return err; +} + +static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *init_hca, u64 icm_size) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 aux_pages; + int err; + + err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); + if (err) { + mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); + return err; + } + + mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", + (unsigned long long) icm_size >> 10, + (unsigned long long) aux_pages << 2); + + priv->fw.aux_icm = mlx4_alloc_icm(dev, (int)aux_pages, + GFP_HIGHUSER | __GFP_NOWARN, 0); + if (!priv->fw.aux_icm) { + mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); + return -ENOMEM; + } + + err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); + if (err) { + mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); + goto err_free_aux; + } + + err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); + if (err) { + mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); + goto err_unmap_aux; + } + + err = mlx4_map_eq_icm(dev, init_hca->eqc_base); + if (err) { + mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); + goto err_unmap_cmpt; + } + + /* + * Reserved MTT entries must be aligned up to a cacheline + * boundary, since the FW will write to them, while the driver + * writes to all other MTT entries. (The variable + * dev->caps.mtt_entry_sz below is really the MTT segment + * size, not the raw entry size) + */ + dev->caps.reserved_mtts = + ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, + dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; + if ( dev->pdev->p_self_do->AlignmentRequirement + 1 != dma_get_cache_alignment()) { + mlx4_dbg(dev, "Cache-line size %d, recommended value %d.\n", + dev->pdev->p_self_do->AlignmentRequirement + 1, + dma_get_cache_alignment() ); + } + + err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, + init_hca->mtt_base, + dev->caps.mtt_entry_sz, + dev->caps.num_mtt_segs, + dev->caps.reserved_mtts, 1, 0); + if (err) { + mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); + goto err_unmap_eq; + } + + err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, + init_hca->dmpt_base, + dev_cap->dmpt_entry_sz, + dev->caps.num_mpts, + dev->caps.reserved_mrws, 1, 1); + if (err) { + mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); + goto err_unmap_mtt; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, + init_hca->qpc_base, + dev_cap->qpc_entry_sz, + dev->caps.num_qps, + dev->caps.reserved_qps, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); + goto err_unmap_dmpt; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, + init_hca->auxc_base, + dev_cap->aux_entry_sz, + dev->caps.num_qps, + dev->caps.reserved_qps, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); + goto err_unmap_qp; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, + init_hca->altc_base, + dev_cap->altc_entry_sz, + dev->caps.num_qps, + dev->caps.reserved_qps, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); + goto err_unmap_auxc; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, + init_hca->rdmarc_base, + dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, + dev->caps.num_qps, + dev->caps.reserved_qps, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); + goto err_unmap_altc; + } + + err = mlx4_init_icm_table(dev, &priv->cq_table.table, + init_hca->cqc_base, + dev_cap->cqc_entry_sz, + dev->caps.num_cqs, + dev->caps.reserved_cqs, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); + goto err_unmap_rdmarc; + } + + err = mlx4_init_icm_table(dev, &priv->srq_table.table, + init_hca->srqc_base, + dev_cap->srq_entry_sz, + dev->caps.num_srqs, + dev->caps.reserved_srqs, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); + goto err_unmap_cq; + } + + /* + * It's not strictly required, but for simplicity just map the + * whole multicast group table now. The table isn't very big + * and it's a lot easier than trying to track ref counts. + */ + err = mlx4_init_icm_table(dev, &priv->mcg_table.table, + init_hca->mc_base, MLX4_MGM_ENTRY_SIZE, + dev->caps.num_mgms + dev->caps.num_amgms, + dev->caps.num_mgms + dev->caps.num_amgms, + 0, 0); + if (err) { + mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); + goto err_unmap_srq; + } + + return 0; + +err_unmap_srq: + mlx4_cleanup_icm_table(dev, &priv->srq_table.table); + +err_unmap_cq: + mlx4_cleanup_icm_table(dev, &priv->cq_table.table); + +err_unmap_rdmarc: + mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); + +err_unmap_altc: + mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); + +err_unmap_auxc: + mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); + +err_unmap_qp: + mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); + +err_unmap_dmpt: + mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); + +err_unmap_mtt: + mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); + +err_unmap_eq: + mlx4_unmap_eq_icm(dev); + +err_unmap_cmpt: + mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); + +err_unmap_aux: + mlx4_UNMAP_ICM_AUX(dev); + +err_free_aux: + mlx4_free_icm(dev, priv->fw.aux_icm, 0); + + return err; +} + +static void mlx4_free_icms(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); + mlx4_cleanup_icm_table(dev, &priv->srq_table.table); + mlx4_cleanup_icm_table(dev, &priv->cq_table.table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); + mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); + mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); + mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); + mlx4_unmap_eq_icm(dev); + + mlx4_UNMAP_ICM_AUX(dev); + mlx4_free_icm(dev, priv->fw.aux_icm, 0); +} + +static void mlx4_close_hca(struct mlx4_dev *dev) +{ + mlx4_CLOSE_HCA(dev, 0); + mlx4_free_icms(dev); + mlx4_UNMAP_FA(dev); + mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); +} + +static int mlx4_init_hca(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_adapter adapter; + struct mlx4_dev_cap dev_cap; + struct mlx4_profile profile; + struct mlx4_init_hca_param init_hca; + u64 icm_size; + int err; + + err = mlx4_QUERY_FW(dev); + if (err) { + mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); + return err; + } + + err = mlx4_load_fw(dev); + if (err) { + mlx4_err(dev, "Failed to start FW, aborting.\n"); + return err; + } + + err = mlx4_dev_cap(dev, &dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_stop_fw; + } + + process_mod_param_profile(); + profile = default_profile; + + icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca); + if ((long long) icm_size < 0) { + err = (int)icm_size; + goto err_stop_fw; + } + + init_hca.log_uar_sz = (u8)ilog2(dev->caps.num_uars); + + err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); + if (err) + goto err_stop_fw; + + err = mlx4_INIT_HCA(dev, &init_hca); + if (err) { + mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); + goto err_free_icm; + } + + err = mlx4_QUERY_ADAPTER(dev, &adapter); + if (err) { + mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); + goto err_close; + } + + priv->eq_table.inta_pin = adapter.inta_pin; + memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); + + return 0; + +err_close: + mlx4_close_hca(dev); + +err_free_icm: + mlx4_free_icms(dev); + +err_stop_fw: + mlx4_UNMAP_FA(dev); + mlx4_free_icm(dev, priv->fw.fw_icm, 0); + + return err; +} + +static int mlx4_setup_hca(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + err = mlx4_init_uar_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "user access region table, aborting.\n"); + return err; + } + + err = mlx4_uar_alloc(dev, &priv->driver_uar); + if (err) { + mlx4_err(dev, "Failed to allocate driver access region, " + "aborting.\n"); + goto err_uar_table_free; + } + + priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!priv->kar) { + mlx4_err(dev, "Couldn't map kernel access region, " + "aborting.\n"); + err = -ENOMEM; + goto err_uar_free; + } + + err = mlx4_init_pd_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "protection domain table, aborting.\n"); + goto err_kar_unmap; + } + + err = mlx4_init_mr_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "memory region table, aborting.\n"); + goto err_pd_table_free; + } + + + err = mlx4_init_eq_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "event queue table, aborting.\n"); + goto err_mr_table_free; + } + + err = mlx4_cmd_use_events(dev); + if (err) { + mlx4_err(dev, "Failed to switch to event-driven " + "firmware commands, aborting.\n"); + goto err_eq_table_free; + } + + err = mlx4_NOP(dev); + if (err) { + if (dev->flags & MLX4_FLAG_MSI_X) { + mlx4_warn(dev, "NOP command failed to generate MSI-X " + "interrupt IRQ %d).\n", + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + mlx4_warn(dev, "Trying again without MSI-X.\n"); + } else { + mlx4_err(dev, "NOP command failed to generate interrupt " + "(IRQ %d), aborting.\n", + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); + } + + goto err_cmd_poll; + } + + mlx4_dbg(dev, "NOP command IRQ test passed\n"); + + err = mlx4_init_cq_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "completion queue table, aborting.\n"); + goto err_cmd_poll; + } + + err = mlx4_init_srq_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "shared receive queue table, aborting.\n"); + goto err_cq_table_free; + } + + err = mlx4_init_qp_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "queue pair table, aborting.\n"); + goto err_srq_table_free; + } + + err = mlx4_init_mcg_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "multicast group table, aborting.\n"); + goto err_qp_table_free; + } + + return 0; + +err_qp_table_free: + mlx4_cleanup_qp_table(dev); + +err_srq_table_free: + mlx4_cleanup_srq_table(dev); + +err_cq_table_free: + mlx4_cleanup_cq_table(dev); + +err_cmd_poll: + mlx4_cmd_use_polling(dev); + +err_eq_table_free: + mlx4_cleanup_eq_table(dev); + +err_mr_table_free: + mlx4_cleanup_mr_table(dev); + +err_pd_table_free: + mlx4_cleanup_pd_table(dev); + +err_kar_unmap: + iounmap(priv->kar,PAGE_SIZE); + +err_uar_free: + mlx4_uar_free(dev, &priv->driver_uar); + +err_uar_table_free: + mlx4_cleanup_uar_table(dev); + return err; +} + +static void __devinit mlx4_enable_msi_x(struct mlx4_dev *dev) +{ +#ifdef CONFIG_PCI_MSI + struct mlx4_priv *priv = mlx4_priv(dev); + struct msix_entry entries[MLX4_NUM_EQ]; + int err; + int i; + + if (msi_x) { + for (i = 0; i < MLX4_NUM_EQ; ++i) + entries[i].entry = i; + + err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries)); + if (err) { + if (err > 0) + mlx4_info(dev, "Only %d MSI-X vectors available, " + "not using MSI-X\n", err); + goto no_msi; + } + + for (i = 0; i < MLX4_NUM_EQ; ++i) + priv->eq_table.eq[i].irq = entries[i].vector; + + dev->flags |= MLX4_FLAG_MSI_X; + return; + } + +no_msi: + for (i = 0; i < MLX4_NUM_EQ; ++i) + priv->eq_table.eq[i].irq = dev->pdev->irq; + +#else + UNUSED_PARAM(dev); +#endif +} + + +static struct pci_device_id * mlx4_find_pci_dev(USHORT ven_id, USHORT dev_id) +{ + struct pci_device_id *p_id = mlx4_pci_table; + int i; + + // find p_id (appropriate line in mlx4_pci_table) + for (i = 0; i < MLX4_PCI_TABLE_SIZE; ++i, ++p_id) { + if (p_id->device == dev_id && p_id->vendor == ven_id) + return p_id; + } + return NULL; +} + +int mlx4_init_one(struct pci_dev *pdev) +{ + struct pci_device_id *id; + struct mlx4_priv *priv; + struct mlx4_dev *dev; + int err; + +#ifdef FORCE_LIVEFISH + if (pdev) + goto err; +#endif + + /* find the type of device */ + id = mlx4_find_pci_dev(pdev->ven_id, pdev->dev_id); + if (id == NULL) { + err = -ENOSYS; + goto err; + } + + /* + * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not + * be present) + */ + if (pci_resource_len(pdev, 0) != 1 << 20) { + MLX4_PRINT(TRACE_LEVEL_INFORMATION ,MLX4_DBG_LOW , + ("Missing DCS, aborting.\n")); + err = -ENODEV; + goto err; + } + if (!pci_resource_len(pdev, 1)) { + MLX4_PRINT(TRACE_LEVEL_INFORMATION ,MLX4_DBG_LOW , + ("Missing UAR, aborting.\n")); + err = -ENODEV; + goto err; + } + +run_as_livefish: + /* allocate mlx4_priv structure */ + priv = kzalloc(sizeof *priv, GFP_KERNEL); + if (!priv) { + MLX4_PRINT(TRACE_LEVEL_INFORMATION ,MLX4_DBG_LOW , + ("Device struct alloc failed, aborting.\n")); + err = -ENOMEM; + goto end; + } + /* must be here for livefish */ + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); + + /* deal with livefish, if any */ + dev = &priv->dev; + dev->pdev = pdev; + pdev->dev = dev; + if (id->driver_data == LIVEFISH) + dev->flags |= MLX4_FLAG_LIVEFISH; + if (mlx4_is_livefish(dev)) { + err = mlx4_register_device(dev); + if (err) + MLX4_PRINT(TRACE_LEVEL_INFORMATION ,MLX4_DBG_LOW , + ("mlx4_register_device for livefish failed, trying to proceed.\n")); + goto end; + } + + /* + * Now reset the HCA before we touch the PCI capabilities or + * attempt a firmware command, since a boot ROM may have left + * the HCA in an undefined state. + */ + err = mlx4_reset(dev); + if (err) { + mlx4_err(dev, "Failed to reset HCA, aborting.\n"); + goto err_free_dev; + } + + if (mlx4_cmd_init(dev)) { + mlx4_err(dev, "Failed to init command interface, aborting.\n"); + goto err_free_dev; + } + + err = mlx4_init_hca(dev); + if (err) + goto err_cmd; + + mlx4_enable_msi_x(dev); + + err = mlx4_setup_hca(dev); + if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) { +#ifdef CONFIG_PCI_MSI + dev->flags &= ~MLX4_FLAG_MSI_X; + pci_disable_msix(pdev); +#endif + err = mlx4_setup_hca(dev); + } + + if (err) + goto err_close; + + err = mlx4_register_device(dev); + if (err) + goto err_cleanup; + + mlx4_dbg(dev, "MLX4_BUS: NET device (dev_id=%d) is INITIALIZED ! \n", (int)pdev->dev_id); + return 0; + +err_cleanup: + mlx4_cleanup_mcg_table(dev); + mlx4_cleanup_qp_table(dev); + mlx4_cleanup_srq_table(dev); + mlx4_cleanup_cq_table(dev); + mlx4_cmd_use_polling(dev); + mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mr_table(dev); + mlx4_cleanup_pd_table(dev); + mlx4_cleanup_uar_table(dev); + +err_close: +#ifdef CONFIG_PCI_MSI + if (dev->flags & MLX4_FLAG_MSI_X) + pci_disable_msix(pdev); +#endif + + mlx4_close_hca(dev); + +err_cmd: + mlx4_cmd_cleanup(dev); + +err_free_dev: + kfree(priv); + +err: + /* we failed device initialization - try to simulate "livefish" device to facilitate using FW burning tools */ + pdev->dev_id = DEVID_HERMON_BD; + id = mlx4_find_pci_dev(pdev->ven_id, pdev->dev_id); + if (id == NULL) { + err = -ENOSYS; + goto end; + } + goto run_as_livefish; + +end: + return err; +} + +void mlx4_remove_one(struct pci_dev *pdev) +{ + struct mlx4_dev *dev = pdev->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + int p; + + if (dev) { + mlx4_unregister_device(dev); + if (mlx4_is_livefish(dev)) + goto done; + + for (p = 1; p <= dev->caps.num_ports; ++p) + mlx4_CLOSE_PORT(dev, p); + + mlx4_cleanup_mcg_table(dev); + mlx4_cleanup_qp_table(dev); + mlx4_cleanup_srq_table(dev); + mlx4_cleanup_cq_table(dev); + mlx4_cmd_use_polling(dev); + mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mr_table(dev); + mlx4_cleanup_pd_table(dev); + + iounmap(priv->kar,PAGE_SIZE); + mlx4_uar_free(dev, &priv->driver_uar); + mlx4_cleanup_uar_table(dev); + mlx4_close_hca(dev); + mlx4_cmd_cleanup(dev); + +#ifdef CONFIG_PCI_MSI + if (dev->flags & MLX4_FLAG_MSI_X) + pci_disable_msix(pdev); +#endif + + mlx4_dbg(dev, "MLX4_BUS: NET device (dev_id=%d) is REMOVED ! \n", (int)pdev->dev_id); + pdev->dev = NULL; +done: + kfree(priv); + } +} + +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_remove_one(pdev); + return mlx4_init_one(pdev); +} + +void mlx4_net_init() +{ + mlx4_intf_init(); +} + diff --git a/branches/ConnectX/hw/mlx4/kernel/net/makefile b/branches/ConnectX/hw/mlx4/kernel/net/makefile new file mode 100644 index 00000000..a0c06273 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\..\inc\openib.def diff --git a/branches/ConnectX/hw/mlx4/kernel/net/mcg.c b/branches/ConnectX/hw/mlx4/kernel/net/mcg.c new file mode 100644 index 00000000..6a020eac --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/mcg.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "cmd.h" + +struct mlx4_mgm { + __be32 next_gid_index; + __be32 members_count; + u32 reserved[2]; + u8 gid[16]; + __be32 qp[MLX4_QP_PER_MGM]; +}; + +static const u8 zero_gid[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + +static int mlx4_READ_MCG(struct mlx4_dev *dev, int index, + struct mlx4_cmd_mailbox *mailbox) +{ + return mlx4_cmd_box(dev, 0, mailbox->dma.da, index, 0, MLX4_CMD_READ_MCG, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_WRITE_MCG(struct mlx4_dev *dev, int index, + struct mlx4_cmd_mailbox *mailbox) +{ + return mlx4_cmd(dev, mailbox->dma.da, index, 0, MLX4_CMD_WRITE_MCG, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + u16 *hash) +{ + u64 imm; + int err; + + err = mlx4_cmd_imm(dev, mailbox->dma.da, &imm, 0, 0, MLX4_CMD_MGID_HASH, + MLX4_CMD_TIME_CLASS_A); + + if (!err) + *hash = (u16)imm; + + return err; +} + +/* + * Caller must hold MCG table semaphore. gid and mgm parameters must + * be properly aligned for command interface. + * + * Returns 0 unless a firmware command error occurs. + * + * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 + * and *mgm holds MGM entry. + * + * if GID is found in AMGM, *index = index in AMGM, *prev = index of + * previous entry in hash chain and *mgm holds AMGM entry. + * + * If no AMGM exists for given gid, *index = -1, *prev = index of last + * entry in hash chain and *mgm holds end of hash chain. + */ +static int find_mgm(struct mlx4_dev *dev, + u8 *gid, struct mlx4_cmd_mailbox *mgm_mailbox, + u16 *hash, int *prev, int *index) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm = mgm_mailbox->buf; + u8 *mgid; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + mgid = mailbox->buf; + + memcpy(mgid, gid, 16); + + err = mlx4_MGID_HASH(dev, mailbox, hash); + mlx4_free_cmd_mailbox(dev, mailbox); + if (err) + return err; + +#if 0 + mlx4_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" + "%04x:%04x:%04x:%04x is %04x\n", + be16_to_cpu(((__be16 *) gid)[0]), + be16_to_cpu(((__be16 *) gid)[1]), + be16_to_cpu(((__be16 *) gid)[2]), + be16_to_cpu(((__be16 *) gid)[3]), + be16_to_cpu(((__be16 *) gid)[4]), + be16_to_cpu(((__be16 *) gid)[5]), + be16_to_cpu(((__be16 *) gid)[6]), + be16_to_cpu(((__be16 *) gid)[7]), + *hash); +#endif + + *index = *hash; + *prev = -1; + + do { + err = mlx4_READ_MCG(dev, *index, mgm_mailbox); + if (err) + return err; + + if (!memcmp(mgm->gid, zero_gid, 16)) { + if (*index != *hash) { + mlx4_err(dev, "Found zero MGID in AMGM.\n"); + err = -EINVAL; + } + return err; + } + + if (!memcmp(mgm->gid, gid, 16)) + return err; + + *prev = *index; + *index = be32_to_cpu(mgm->next_gid_index) >> 6; + } while (*index); + + *index = -1; + return err; +} + +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 members_count; + u16 hash; + int index, prev; + int link = 0; + unsigned i; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + mutex_lock(&priv->mcg_table.mutex); + + err = find_mgm(dev, gid, mailbox, &hash, &prev, &index); + if (err) + goto out; + + if (index != -1) { + if (!memcmp(mgm->gid, zero_gid, 16)) + memcpy(mgm->gid, gid, 16); + } else { + link = 1; + + index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap); + if (index == -1) { + mlx4_err(dev, "No AMGM entries left\n"); + err = -ENOMEM; + goto out; + } + index += dev->caps.num_mgms; + + err = mlx4_READ_MCG(dev, index, mailbox); + if (err) + goto out; + + memset(mgm, 0, sizeof *mgm); + memcpy(mgm->gid, gid, 16); + } + + members_count = be32_to_cpu(mgm->members_count); + if (members_count == MLX4_QP_PER_MGM) { + mlx4_err(dev, "MGM at index %x is full.\n", index); + err = -ENOMEM; + goto out; + } + + for (i = 0; i < members_count; ++i) + if (mgm->qp[i] == cpu_to_be32(qp->qpn)) { + mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); + err = 0; + goto out; + } + + mgm->qp[members_count++] = cpu_to_be32(qp->qpn); + mgm->members_count = cpu_to_be32(members_count); + + err = mlx4_WRITE_MCG(dev, index, mailbox); + if (err) + goto out; + + if (!link) + goto out; + + err = mlx4_READ_MCG(dev, prev, mailbox); + if (err) + goto out; + + mgm->next_gid_index = cpu_to_be32(index << 6); + + err = mlx4_WRITE_MCG(dev, prev, mailbox); + if (err) + goto out; + +out: + if (err && link && index != -1) { + if (index < dev->caps.num_mgms) + mlx4_warn(dev, "Got AMGM index %d < %d", + index, dev->caps.num_mgms); + else + mlx4_bitmap_free(&priv->mcg_table.bitmap, + index - dev->caps.num_mgms); + } + mutex_unlock(&priv->mcg_table.mutex); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_multicast_attach); + +int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 members_count; + u16 hash; + int prev, index; + int i, loc; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + mutex_lock(&priv->mcg_table.mutex); + + err = find_mgm(dev, gid, mailbox, &hash, &prev, &index); + if (err) + goto out; + + if (index == -1) { + mlx4_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "not found\n", + be16_to_cpu(((__be16 *) gid)[0]), + be16_to_cpu(((__be16 *) gid)[1]), + be16_to_cpu(((__be16 *) gid)[2]), + be16_to_cpu(((__be16 *) gid)[3]), + be16_to_cpu(((__be16 *) gid)[4]), + be16_to_cpu(((__be16 *) gid)[5]), + be16_to_cpu(((__be16 *) gid)[6]), + be16_to_cpu(((__be16 *) gid)[7])); + err = -EINVAL; + goto out; + } + + members_count = be32_to_cpu(mgm->members_count); + for (loc = -1, i = 0; i < (int)members_count; ++i) + if (mgm->qp[i] == cpu_to_be32(qp->qpn)) + loc = i; + + if (loc == -1) { + mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn); + err = -EINVAL; + goto out; + } + + + mgm->members_count = cpu_to_be32(--members_count); + mgm->qp[loc] = mgm->qp[i - 1]; + mgm->qp[i - 1] = 0; + + err = mlx4_WRITE_MCG(dev, index, mailbox); + if (err) + goto out; + + if (i != 1) + goto out; + + if (prev == -1) { + /* Remove entry from MGM */ + int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6; + if (amgm_index) { + err = mlx4_READ_MCG(dev, amgm_index, mailbox); + if (err) + goto out; + } else + memset(mgm->gid, 0, 16); + + err = mlx4_WRITE_MCG(dev, index, mailbox); + if (err) + goto out; + + if (amgm_index) { + if (amgm_index < dev->caps.num_mgms) + mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d", + index, amgm_index, dev->caps.num_mgms); + else + mlx4_bitmap_free(&priv->mcg_table.bitmap, + amgm_index - dev->caps.num_mgms); + } + } else { + /* Remove entry from AMGM */ + int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; + err = mlx4_READ_MCG(dev, prev, mailbox); + if (err) + goto out; + + mgm->next_gid_index = cpu_to_be32(cur_next_index << 6); + + err = mlx4_WRITE_MCG(dev, prev, mailbox); + if (err) + goto out; + + if (index < dev->caps.num_mgms) + mlx4_warn(dev, "entry %d had next AMGM index %d < %d", + prev, index, dev->caps.num_mgms); + else + mlx4_bitmap_free(&priv->mcg_table.bitmap, + index - dev->caps.num_mgms); + } + +out: + mutex_unlock(&priv->mcg_table.mutex); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_multicast_detach); + +int mlx4_init_mcg_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + err = mlx4_bitmap_init(&priv->mcg_table.bitmap, + dev->caps.num_amgms, dev->caps.num_amgms - 1, 0); + if (err) + return err; + + mutex_init(&priv->mcg_table.mutex); + + return 0; +} + +void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/mlx4.h b/branches/ConnectX/hw/mlx4/kernel/net/mlx4.h new file mode 100644 index 00000000..f1a47a39 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/mlx4.h @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_H +#define MLX4_H + +#include "l2w.h" +#include "device.h" +#include "doorbell.h" + +#define DRV_NAME "mlx4_net" +#define PFX DRV_NAME ": " + + +// +// Structure for reporting data to WMI +// + +typedef struct _BUS_WMI_STD_DATA { + UINT32 DebugPrintLevel; + UINT32 DebugPrintFlags; + +} BUS_WMI_STD_DATA, * PBUS_WMI_STD_DATA; + + +// +// Driver global data +// + +#pragma warning(disable:4201) // nameless struct/union +typedef struct _GLOBALS { + BUS_WMI_STD_DATA; + + int mod_num_qp; + int mod_rdmarc_per_qp; + int mod_num_srq; + int mod_num_cq; + int mod_num_mcg; + int mod_num_mpt; + int mod_num_mtt; + + int enable_qos; +} GLOBALS; +#pragma warning(default:4201) // nameless struct/union + +extern GLOBALS g; + + +enum { + MLX4_HCR_BASE = 0x80680, + MLX4_HCR_SIZE = 0x0001c, + MLX4_CLR_INT_SIZE = 0x00008 +}; + +enum { + MLX4_MGM_ENTRY_SIZE = 0x100, + MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2), + MLX4_MTT_ENTRY_PER_SEG = 8 +}; + +enum { + MLX4_EQ_ASYNC, + MLX4_EQ_COMP, + MLX4_NUM_EQ +}; + +enum { + MLX4_NUM_PDS = 1 << 15 +}; + +enum { + MLX4_CMPT_TYPE_QP = 0, + MLX4_CMPT_TYPE_SRQ = 1, + MLX4_CMPT_TYPE_CQ = 2, + MLX4_CMPT_TYPE_EQ = 3, + MLX4_CMPT_NUM_TYPE +}; + +enum { + MLX4_CMPT_SHIFT = 24, + MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT +}; + +struct mlx4_bitmap { + u32 last; + u32 top; + u32 max; + u32 mask; + spinlock_t lock; + unsigned long *table; +}; + +struct mlx4_buddy { + unsigned long **bits; + int max_order; + spinlock_t lock; +}; + +struct mlx4_icm; + +struct mlx4_icm_table { + u64 virt; + int num_icm; + int num_obj; + int obj_size; + int lowmem; + int coherent; + struct mutex mutex; + struct mlx4_icm **icm; +}; + +struct mlx4_eq { + struct mlx4_dev *dev; + void __iomem *doorbell; + int eqn; + u32 cons_index; + u16 irq; + u16 have_irq; + int nent; + struct mlx4_buf_list *page_list; + struct mlx4_mtt mtt; + // Windows + KDPC dpc; /* DPC routine */ + spinlock_t lock; /* spinlock for simult DPCs */ + int eq_ix; /* EQ index - 0..MLX4_NUM_EQ */ + BOOLEAN (*isr)(int,void*); /* isr */ + void * ctx; /* isr ctx */ +}; + +struct mlx4_profile { + int num_qp; + int rdmarc_per_qp; + int num_srq; + int num_cq; + int num_mcg; + int num_mpt; + int num_mtt; +}; + +struct mlx4_fw { + u64 clr_int_base; + u64 catas_offset; + struct mlx4_icm *fw_icm; + struct mlx4_icm *aux_icm; + u32 catas_size; + u16 fw_pages; + u8 clr_int_bar; + u8 catas_bar; +}; + +struct mlx4_cmd { + struct pci_pool *pool; + u8 __iomem *hcr; + struct mutex hcr_mutex; + struct semaphore poll_sem; + struct semaphore event_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct mlx4_cmd_context *context; + u16 token_mask; + u8 use_events; + u8 toggle; +}; + +struct mlx4_uar_table { + struct mlx4_bitmap bitmap; +}; + +struct mlx4_mr_table { + struct mlx4_bitmap mpt_bitmap; + struct mlx4_buddy mtt_buddy; + u64 mtt_base; + u64 mpt_base; + struct mlx4_icm_table mtt_table; + struct mlx4_icm_table dmpt_table; +}; + +struct mlx4_cq_table { + struct mlx4_bitmap bitmap; + spinlock_t lock; + struct radix_tree_root tree; + struct mlx4_icm_table table; + struct mlx4_icm_table cmpt_table; +}; + +struct mlx4_eq_table { + struct mlx4_bitmap bitmap; + void __iomem *clr_int; + u8 __iomem *uar_map[(MLX4_NUM_EQ + 6) / 4]; + u32 clr_mask; + struct mlx4_eq eq[MLX4_NUM_EQ]; + u64 icm_virt; + dma_addr_t icm_page; + dma_addr_t icm_dma; + struct mlx4_icm_table cmpt_table; + int have_irq; + u8 inta_pin; +}; + +struct mlx4_srq_table { + struct mlx4_bitmap bitmap; + spinlock_t lock; + struct radix_tree_root tree; + struct mlx4_icm_table table; + struct mlx4_icm_table cmpt_table; +}; + +struct mlx4_qp_table { + struct mlx4_bitmap bitmap; + u32 rdmarc_base; + int rdmarc_shift; + spinlock_t lock; + struct mlx4_icm_table qp_table; + struct mlx4_icm_table auxc_table; + struct mlx4_icm_table altc_table; + struct mlx4_icm_table rdmarc_table; + struct mlx4_icm_table cmpt_table; +}; + +struct mlx4_mcg_table { + struct mutex mutex; + struct mlx4_bitmap bitmap; + struct mlx4_icm_table table; +}; + +struct mlx4_catas_err { + u32 __iomem *map; + struct list_head list; + /* Windows */ + int stop; + KTIMER timer; + KDPC timer_dpc; + LARGE_INTEGER interval; +}; + +struct mlx4_priv { + struct mlx4_dev dev; + + struct list_head dev_list; + struct list_head ctx_list; + spinlock_t ctx_lock; + + struct mlx4_fw fw; + struct mlx4_cmd cmd; + + struct mlx4_bitmap pd_bitmap; + struct mlx4_uar_table uar_table; + struct mlx4_mr_table mr_table; + struct mlx4_cq_table cq_table; + struct mlx4_eq_table eq_table; + struct mlx4_srq_table srq_table; + struct mlx4_qp_table qp_table; + struct mlx4_mcg_table mcg_table; + + struct mlx4_catas_err catas_err; + + u8 __iomem *clr_base; + + struct mlx4_uar driver_uar; + void __iomem *kar; +}; + +static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) +{ + return container_of(dev, struct mlx4_priv, dev); +} + +u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved); +void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); + +int mlx4_init_pd_table(struct mlx4_dev *dev); +int mlx4_init_uar_table(struct mlx4_dev *dev); +int mlx4_init_mr_table(struct mlx4_dev *dev); +int mlx4_init_eq_table(struct mlx4_dev *dev); +int mlx4_init_cq_table(struct mlx4_dev *dev); +int mlx4_init_qp_table(struct mlx4_dev *dev); +int mlx4_init_srq_table(struct mlx4_dev *dev); +int mlx4_init_mcg_table(struct mlx4_dev *dev); + +void mlx4_cleanup_pd_table(struct mlx4_dev *dev); +void mlx4_cleanup_uar_table(struct mlx4_dev *dev); +void mlx4_cleanup_mr_table(struct mlx4_dev *dev); +void mlx4_cleanup_eq_table(struct mlx4_dev *dev); +void mlx4_cleanup_cq_table(struct mlx4_dev *dev); +void mlx4_cleanup_qp_table(struct mlx4_dev *dev); +void mlx4_cleanup_srq_table(struct mlx4_dev *dev); +void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); + +void mlx4_start_catas_poll(struct mlx4_dev *dev); +void mlx4_stop_catas_poll(struct mlx4_dev *dev); +int mlx4_restart_one(struct pci_dev *pdev); +int mlx4_register_device(struct mlx4_dev *dev); +void mlx4_unregister_device(struct mlx4_dev *dev); +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, + int subtype, int port); +void mlx4_intf_init(); +void mlx4_net_init(); + +struct mlx4_dev_cap; +struct mlx4_init_hca_param; + +u64 mlx4_make_profile(struct mlx4_dev *dev, + struct mlx4_profile *request, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *init_hca); + +int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt); +void mlx4_unmap_eq_icm(struct mlx4_dev *dev); + +int mlx4_cmd_init(struct mlx4_dev *dev); +void mlx4_cmd_cleanup(struct mlx4_dev *dev); +void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); +int mlx4_cmd_use_events(struct mlx4_dev *dev); +void mlx4_cmd_use_polling(struct mlx4_dev *dev); + +void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); +void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type); + +void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); + +void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); + +void mlx4_handle_catas_err(struct mlx4_dev *dev); + +int mlx4_init_one(struct pci_dev *pdev); + +void mlx4_remove_one(struct pci_dev *pdev); + + +#endif /* MLX4_H */ diff --git a/branches/ConnectX/hw/mlx4/kernel/net/mr.c b/branches/ConnectX/hw/mlx4/kernel/net/mr.c new file mode 100644 index 00000000..51e5a0b7 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/mr.c @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "cmd.h" +#include "icm.h" + +/* + * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. + */ +#pragma pack(push,1) +struct mlx4_mpt_entry { + __be32 flags; + __be32 qpn; + __be32 key; + __be32 pd; + __be64 start; + __be64 length; + __be32 lkey; + __be32 win_cnt; + u8 reserved1[3]; + u8 mtt_rep; + __be64 mtt_seg; + __be32 mtt_sz; + __be32 entity_size; + __be32 first_byte_offset; +} __attribute__((packed)); +#pragma pack(pop) + +#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MLX4_MPT_FLAG_MIO (1 << 17) +#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) +#define MLX4_MPT_FLAG_REGION (1 << 8) + +#define MLX4_MTT_FLAG_PRESENT 1 + +#define MLX4_MPT_STATUS_SW 0xF0 +#define MLX4_MPT_STATUS_HW 0x00 + +static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) +{ + int o; + int m; + u32 seg; + + spin_lock(&buddy->lock); + + for (o = order; o <= buddy->max_order; ++o) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < (u32)m) + goto found; + } + + spin_unlock(&buddy->lock); + return (u32)-1; + + found: + clear_bit(seg, buddy->bits[o]); + + while (o > order) { + --o; + seg <<= 1; + set_bit(seg ^ 1, buddy->bits[o]); + } + + spin_unlock(&buddy->lock); + + seg <<= order; + + return seg; +} + +static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order) +{ + seg >>= order; + + spin_lock(&buddy->lock); + + while (test_bit(seg ^ 1, buddy->bits[order])) { + clear_bit(seg ^ 1, buddy->bits[order]); + seg >>= 1; + ++order; + } + + set_bit(seg, buddy->bits[order]); + + spin_unlock(&buddy->lock); +} + +static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) +{ + int i, s; + + buddy->max_order = max_order; + spin_lock_init(&buddy->lock); + + buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), + GFP_KERNEL); + if (!buddy->bits) + goto err_out; + + for (i = 0; i <= buddy->max_order; ++i) { + s = BITS_TO_LONGS(1 << (buddy->max_order - i)); + buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); + if (!buddy->bits[i]) + goto err_out_free; + bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); + } + + set_bit(0, buddy->bits[buddy->max_order]); + + return 0; + +err_out_free: + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); + +err_out: + return -ENOMEM; +} + +static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) +{ + int i; + + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); +} + +static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + u32 seg; + + seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order); + if (seg == -1) + return (u32)-1; + + if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg, + seg + (1 << order) - 1)) { + mlx4_buddy_free(&mr_table->mtt_buddy, seg, order); + return (u32)-1; + } + + return seg; +} + +int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, + struct mlx4_mtt *mtt) +{ + int i; + + if (!npages) { + mtt->order = -1; + mtt->page_shift = MLX4_ICM_PAGE_SHIFT; + return 0; + } else + mtt->page_shift = page_shift; + + for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1) + ++mtt->order; + + mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order); + if (mtt->first_seg == -1) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mtt_init); + +void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + + if (mtt->order < 0) + return; + + mlx4_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); + mlx4_table_put_range(dev, &mr_table->mtt_table, mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); +} +EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup); + +u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt) +{ + return (u64) mtt->first_seg * dev->caps.mtt_entry_sz; +} +EXPORT_SYMBOL_GPL(mlx4_mtt_addr); + +static u32 hw_index_to_key(u32 ind) +{ + return (ind >> 24) | (ind << 8); +} + +static u32 key_to_hw_index(u32 key) +{ + return (key << 24) | (key >> 8); +} + +static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int mpt_index) +{ + return mlx4_cmd(dev, mailbox->dma.da, mpt_index, 0, MLX4_CMD_SW2HW_MPT, + MLX4_CMD_TIME_CLASS_B); +} + +static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int mpt_index) +{ + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma.da : 0, mpt_index, + (u8)!mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B); +} + +int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, + int npages, int page_shift, struct mlx4_mr *mr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u32 index; + int err; + + index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); + if (index == -1) + return -ENOMEM; + + mr->iova = iova; + mr->size = size; + mr->pd = pd; + mr->access = access; + mr->enabled = 0; + mr->key = hw_index_to_key(index); + + err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); + if (err) + mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_alloc); + +void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + if (mr->enabled) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mr->key) & + (dev->caps.num_mpts - 1)); + if (err) + mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err); + } + + mlx4_mtt_cleanup(dev, &mr->mtt); + mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key)); +} +EXPORT_SYMBOL_GPL(mlx4_mr_free); + +int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_table_get(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + + memset(mpt_entry, 0, sizeof *mpt_entry); + + mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS | + MLX4_MPT_FLAG_MIO | + MLX4_MPT_FLAG_REGION | + mr->access); + + mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); + mpt_entry->pd = cpu_to_be32(mr->pd); + mpt_entry->start = cpu_to_be64(mr->iova); + mpt_entry->length = cpu_to_be64(mr->size); + mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_seg = 0; + } else + mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + + mr->enabled = 1; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_enable); + +static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + __be64 *mtts; + dma_addr_t dma_handle; + int i; + int s = start_index * sizeof (u64); + + /* All MTTs must fit in the same page */ + if (start_index / (PAGE_SIZE / sizeof (u64)) != + (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64))) + return -EINVAL; + + if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1)) + return -EINVAL; + + mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg + + s / dev->caps.mtt_entry_sz, &dma_handle); + if (!mtts) + return -ENOMEM; + + for (i = 0; i < npages; ++i) + mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); + + dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); + + return 0; +} + +int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list) +{ + int chunk; + int err; + + if (mtt->order < 0) + return -EINVAL; + + while (npages > 0) { + chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages); + err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list); + if (err) + return err; + + npages -= chunk; + start_index += chunk; + page_list += chunk; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_write_mtt); + +int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_buf *buf) +{ + u64 *page_list; + int err; + int i; + + page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + for (i = 0; i < buf->npages; ++i) + if (buf->nbufs == 1) + page_list[i] = buf->u.direct.map.da + (i << buf->page_shift); + else + page_list[i] = buf->u.page_list[i].map.da; + + err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); + + kfree(page_list); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); + +int mlx4_init_mr_table(struct mlx4_dev *dev) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + int err; + + err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts, + (u32)~0, (u32)dev->caps.reserved_mrws); + if (err) + return err; + + err = mlx4_buddy_init(&mr_table->mtt_buddy, + ilog2(dev->caps.num_mtt_segs)); + if (err) + goto err_buddy; + + if (dev->caps.reserved_mtts) { + if (mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)) == -1) { + mlx4_warn(dev, "MTT table of order %d is too small.\n", + mr_table->mtt_buddy.max_order); + err = -ENOMEM; + goto err_reserve_mtts; + } + } + + return 0; + +err_reserve_mtts: + mlx4_buddy_cleanup(&mr_table->mtt_buddy); + +err_buddy: + mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); + + return err; +} + +void mlx4_cleanup_mr_table(struct mlx4_dev *dev) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + + mlx4_buddy_cleanup(&mr_table->mtt_buddy); + mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); +} + +static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list, + int npages, u64 iova) +{ + int i, page_mask; + + if (npages > fmr->max_pages) + return -EINVAL; + + page_mask = (1 << fmr->page_shift) - 1; + + /* We are getting page lists, so va must be page aligned. */ + if (iova & page_mask) + return -EINVAL; + + /* Trust the user not to pass misaligned data in page_list */ + if (!fmr) /* instead of 0, that is warned by compiler */ + for (i = 0; i < npages; ++i) { + if (page_list[i] & ~page_mask) + return -EINVAL; + } + + if (fmr->maps >= fmr->max_maps) + return -EINVAL; + + return 0; +} + +int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, + int npages, u64 iova, u32 *lkey, u32 *rkey) +{ + u32 key; + int i, err; + + err = mlx4_check_fmr(fmr, page_list, npages, iova); + if (err) + return err; + + ++fmr->maps; + + key = key_to_hw_index(fmr->mr.key); + key += dev->caps.num_mpts; + *lkey = *rkey = fmr->mr.key = hw_index_to_key(key); + + *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW; + + /* Make sure MPT status is visible before writing MTT entries */ + wmb(); + + for (i = 0; i < npages; ++i) + fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); + + dma_sync_single(&dev->pdev->dev, fmr->dma_handle, + npages * sizeof(u64), DMA_TO_DEVICE); + + fmr->mpt->key = cpu_to_be32(key); + fmr->mpt->lkey = cpu_to_be32(key); + fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift)); + fmr->mpt->start = cpu_to_be64(iova); + + /* Make MTT entries are visible before setting MPT status */ + wmb(); + + *(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW; + + /* Make sure MPT status is visible before consumer can use FMR */ + wmb(); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr); + +int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, + int max_maps, u8 page_shift, struct mlx4_fmr *fmr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 mtt_seg; + int err = -ENOMEM; + + if (page_shift < 12 || page_shift >= 32) + return -EINVAL; + + /* All MTTs must fit in the same page */ + if (max_pages * sizeof *fmr->mtts > PAGE_SIZE) + return -EINVAL; + + fmr->page_shift = page_shift; + fmr->max_pages = max_pages; + fmr->max_maps = max_maps; + fmr->maps = 0; + + err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages, + page_shift, &fmr->mr); + if (err) + return err; + + mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz; + + fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table, + fmr->mr.mtt.first_seg, + &fmr->dma_handle); + if (!fmr->mtts) { + err = -ENOMEM; + goto err_free; + } + + fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, + key_to_hw_index(fmr->mr.key), NULL); + if (!fmr->mpt) { + err = -ENOMEM; + goto err_free; + } + + return 0; + +err_free: + mlx4_mr_free(dev, &fmr->mr); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); + +int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr) +{ + return mlx4_mr_enable(dev, &fmr->mr); +} +EXPORT_SYMBOL_GPL(mlx4_fmr_enable); + +void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, + u32 *lkey, u32 *rkey) +{ + u32 key; + + if (!fmr->maps) + return; + + key = key_to_hw_index(fmr->mr.key); + key &= dev->caps.num_mpts - 1; + *lkey = *rkey = fmr->mr.key = hw_index_to_key(key); + + fmr->maps = 0; + + *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW; +} +EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); + +int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) +{ + if (fmr->maps) + return -EBUSY; + + fmr->mr.enabled = 0; + mlx4_mr_free(dev, &fmr->mr); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_fmr_free); + +int mlx4_SYNC_TPT(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000); +} +EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT); diff --git a/branches/ConnectX/hw/mlx4/kernel/net/net.def b/branches/ConnectX/hw/mlx4/kernel/net/net.def new file mode 100644 index 00000000..04393934 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/net.def @@ -0,0 +1,16 @@ +LIBRARY mlx4_net.lib + +EXPORTS +; DllInitialize and DllUnload must be exported for the OS reference counting to +; work, and must be private for the compiler to accept them. +DllInitialize private +DllUnload private + +; main.c +mlx4_init_one +mlx4_remove_one +mlx4_net_init + +; alloc.c +mlx4_buf_alloc +mlx4_buf_free \ No newline at end of file diff --git a/branches/ConnectX/hw/mlx4/kernel/net/net.rc b/branches/ConnectX/hw/mlx4/kernel/net/net.rc new file mode 100644 index 00000000..61840a0b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/net.rc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ibal.rc 1611 2006-08-20 14:48:55Z sleybo $ + */ + + +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_UNKNOWN + +#ifdef _DEBUG_ +#define VER_FILEDESCRIPTION_STR "MLX4 Common HW Services (Debug)" +#else +#define VER_FILEDESCRIPTION_STR "MLX4 Common HW Services" +#endif + +#define VER_INTERNALNAME_STR "mlx4_net.lib" +#define VER_ORIGINALFILENAME_STR "mlx4_net.lib" + +#include diff --git a/branches/ConnectX/hw/mlx4/kernel/net/pd.c b/branches/ConnectX/hw/mlx4/kernel/net/pd.c new file mode 100644 index 00000000..3009a9e8 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/pd.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "icm.h" + +int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap); + if (*pdn == -1) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_pd_alloc); + +void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn); +} +EXPORT_SYMBOL_GPL(mlx4_pd_free); + +int mlx4_init_pd_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds, + (1 << 24) - 1, dev->caps.reserved_pds); +} + +void mlx4_cleanup_pd_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap); +} + + +int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) +{ + uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap); + if (uar->index == -1) + return -ENOMEM; + + uar->pfn = (u32)((pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_uar_alloc); + +void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index); +} +EXPORT_SYMBOL_GPL(mlx4_uar_free); + +int mlx4_init_uar_table(struct mlx4_dev *dev) +{ + return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, + dev->caps.num_uars, dev->caps.num_uars - 1, + max(128, dev->caps.reserved_uars)); +} + +void mlx4_cleanup_uar_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/profile.c b/branches/ConnectX/hw/mlx4/kernel/net/profile.c new file mode 100644 index 00000000..4f14c0fe --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/profile.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "fw.h" + +enum { + MLX4_RES_QP, + MLX4_RES_RDMARC, + MLX4_RES_ALTC, + MLX4_RES_AUXC, + MLX4_RES_SRQ, + MLX4_RES_CQ, + MLX4_RES_EQ, + MLX4_RES_DMPT, + MLX4_RES_CMPT, + MLX4_RES_MTT, + MLX4_RES_MCG, + MLX4_RES_NUM +}; + +static const char *res_name[] = { + "QP", /* [MLX4_RES_QP] */ + "RDMARC", /* [MLX4_RES_RDMARC] */ + "ALTC", /* [MLX4_RES_ALTC] */ + "AUXC", /* [MLX4_RES_AUXC] */ + "SRQ", /* [MLX4_RES_SRQ] */ + "CQ", /* [MLX4_RES_CQ] */ + "EQ", /* [MLX4_RES_EQ] */ + "DMPT", /* [MLX4_RES_DMPT] */ + "CMPT", /* [MLX4_RES_CMPT] */ + "MTT", /* [MLX4_RES_MTT] */ + "MCG" /* [MLX4_RES_MCG] */ +}; + +u64 mlx4_make_profile(struct mlx4_dev *dev, + struct mlx4_profile *request, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *init_hca) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource { + u64 size; + u64 start; + int type; + int num; + int log_num; + }; + + u64 total_size = 0; + struct mlx4_resource *profile; + struct mlx4_resource tmp; + int i, j; + + profile = kzalloc(MLX4_RES_NUM * sizeof *profile, GFP_KERNEL); + if (!profile) + return (u64)-ENOMEM; + + profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz; + profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz; + profile[MLX4_RES_ALTC].size = dev_cap->altc_entry_sz; + profile[MLX4_RES_AUXC].size = dev_cap->aux_entry_sz; + profile[MLX4_RES_SRQ].size = dev_cap->srq_entry_sz; + profile[MLX4_RES_CQ].size = dev_cap->cqc_entry_sz; + profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz; + profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz; + profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz; + profile[MLX4_RES_MTT].size = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE; + + profile[MLX4_RES_QP].num = request->num_qp; + profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp; + profile[MLX4_RES_ALTC].num = request->num_qp; + profile[MLX4_RES_AUXC].num = request->num_qp; + profile[MLX4_RES_SRQ].num = request->num_srq; + profile[MLX4_RES_CQ].num = request->num_cq; + profile[MLX4_RES_EQ].num = MLX4_NUM_EQ + dev_cap->reserved_eqs; + profile[MLX4_RES_DMPT].num = request->num_mpt; + profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; + profile[MLX4_RES_MTT].num = request->num_mtt; + profile[MLX4_RES_MCG].num = request->num_mcg; + + for (i = 0; i < MLX4_RES_NUM; ++i) { + profile[i].type = i; + profile[i].num = roundup_pow_of_two(profile[i].num); + profile[i].log_num = ilog2(profile[i].num); + profile[i].size *= profile[i].num; + profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); + } + + /* + * Sort the resources in decreasing order of size. Since they + * all have sizes that are powers of 2, we'll be able to keep + * resources aligned to their size and pack them without gaps + * using the sorted order. + */ + for (i = MLX4_RES_NUM; i > 0; --i) + for (j = 1; j < i; ++j) { + if (profile[j].size > profile[j - 1].size) { + tmp = profile[j]; + profile[j] = profile[j - 1]; + profile[j - 1] = tmp; + } + } + + for (i = 0; i < MLX4_RES_NUM; ++i) { + if (profile[i].size) { + profile[i].start = total_size; + total_size += profile[i].size; + } + + if (total_size > dev_cap->max_icm_sz) { + mlx4_err(dev, "Profile requires 0x%llx bytes; " + "won't fit in 0x%llx bytes of context memory.\n", + (unsigned long long) total_size, + (unsigned long long) dev_cap->max_icm_sz); + kfree(profile); + return (u64)-ENOMEM; + } + + if (profile[i].size) + mlx4_dbg(dev, " profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, " + "size 0x%10llx\n", + i, res_name[profile[i].type], profile[i].log_num, + (unsigned long long) profile[i].start, + (unsigned long long) profile[i].size); + } + + mlx4_dbg(dev, "HCA context memory: reserving %d KB\n", + (int) (total_size >> 10)); + + for (i = 0; i < MLX4_RES_NUM; ++i) { + switch (profile[i].type) { + case MLX4_RES_QP: + dev->caps.num_qps = profile[i].num; + init_hca->qpc_base = profile[i].start; + init_hca->log_num_qps = (u8)profile[i].log_num; + break; + case MLX4_RES_RDMARC: + for (priv->qp_table.rdmarc_shift = 0; + request->num_qp << priv->qp_table.rdmarc_shift < profile[i].num; + ++priv->qp_table.rdmarc_shift) + ; /* nothing */ + dev->caps.max_qp_dest_rdma = 1 << priv->qp_table.rdmarc_shift; + priv->qp_table.rdmarc_base = (u32) profile[i].start; + init_hca->rdmarc_base = profile[i].start; + init_hca->log_rd_per_qp = (u8)priv->qp_table.rdmarc_shift; + break; + case MLX4_RES_ALTC: + init_hca->altc_base = profile[i].start; + break; + case MLX4_RES_AUXC: + init_hca->auxc_base = profile[i].start; + break; + case MLX4_RES_SRQ: + dev->caps.num_srqs = profile[i].num; + init_hca->srqc_base = profile[i].start; + init_hca->log_num_srqs = (u8)profile[i].log_num; + break; + case MLX4_RES_CQ: + dev->caps.num_cqs = profile[i].num; + init_hca->cqc_base = profile[i].start; + init_hca->log_num_cqs = (u8)profile[i].log_num; + break; + case MLX4_RES_EQ: + dev->caps.num_eqs = profile[i].num; + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = (u8)profile[i].log_num; + break; + case MLX4_RES_DMPT: + dev->caps.num_mpts = profile[i].num; + priv->mr_table.mpt_base = profile[i].start; + init_hca->dmpt_base = profile[i].start; + init_hca->log_mpt_sz = (u8)profile[i].log_num; + break; + case MLX4_RES_CMPT: + init_hca->cmpt_base = profile[i].start; + break; + case MLX4_RES_MTT: + dev->caps.num_mtt_segs = profile[i].num; + priv->mr_table.mtt_base = profile[i].start; + init_hca->mtt_base = profile[i].start; + break; + case MLX4_RES_MCG: + dev->caps.num_mgms = profile[i].num >> 1; + dev->caps.num_amgms = profile[i].num >> 1; + init_hca->mc_base = profile[i].start; + init_hca->log_mc_entry_sz = (u16)ilog2(MLX4_MGM_ENTRY_SIZE); + init_hca->log_mc_table_sz = (u8)profile[i].log_num; + init_hca->log_mc_hash_sz = (u16)profile[i].log_num - 1; + break; + default: + break; + } + } + + /* + * PDs don't take any HCA memory, but we assign them as part + * of the HCA profile anyway. + */ + dev->caps.num_pds = MLX4_NUM_PDS; + + kfree(profile); + return total_size; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/qp.c b/branches/ConnectX/hw/mlx4/kernel/net/qp.c new file mode 100644 index 00000000..27589354 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/qp.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "icm.h" +#include "cmd.h" +#include "qp.h" + +void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + + spin_lock_dpc(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + if (qp) + atomic_inc(&qp->refcount); + + spin_unlock_dpc(&qp_table->lock); + + if (!qp) { + if (event_type == MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) + mlx4_dbg(dev, "Async event SRQ_QP_LAST_WQE" + " for bogus QP %08x\n", qpn); + else + mlx4_warn(dev, "Async event %d for bogus QP %08x\n", + event_type, qpn); + return; + } + + qp->event(qp, event_type); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + +int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, + struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, + int sqd_event, struct mlx4_qp *qp) +{ + struct mlx4_cmd_mailbox *mailbox; + int ret = 0; + static u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE]; + static int op_inited = 0; + + if (!op_inited) { + op[MLX4_QP_STATE_RST][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_RST][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + op[MLX4_QP_STATE_RST][MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP; + + op[MLX4_QP_STATE_INIT][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_INIT][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + op[MLX4_QP_STATE_INIT][MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP; + op[MLX4_QP_STATE_INIT][MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP; + + op[MLX4_QP_STATE_RTR][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_RTR][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + op[MLX4_QP_STATE_RTR][MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP; + + op[MLX4_QP_STATE_RTS][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_RTS][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + op[MLX4_QP_STATE_RTS][MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP; + op[MLX4_QP_STATE_RTS][MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP; + + op[MLX4_QP_STATE_SQD][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_SQD][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + op[MLX4_QP_STATE_SQD][MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP; + op[MLX4_QP_STATE_SQD][MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP; + + op[MLX4_QP_STATE_SQER][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_SQER][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + op[MLX4_QP_STATE_SQER][MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP; + + op[MLX4_QP_STATE_ERR][MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP; + op[MLX4_QP_STATE_ERR][MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP; + + op_inited = 1; + }; + + if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || + !op[cur_state][new_state]) + return -EINVAL; + + if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) + return mlx4_cmd(dev, 0, qp->qpn, 2, + MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A); + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) { + u64 mtt_addr = mlx4_mtt_addr(dev, mtt); + context->mtt_base_addr_h = (u8)(mtt_addr >> 32); + context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + context->log_page_size = (u8)(mtt->page_shift - MLX4_ICM_PAGE_SHIFT); + } + + *(__be32 *) mailbox->buf = cpu_to_be32(optpar); + memcpy((u8*)mailbox->buf + 8, context, sizeof *context); + + ((struct mlx4_qp_context *) ((u8*)mailbox->buf + 8))->local_qpn = + cpu_to_be32(qp->qpn); + + ret = mlx4_cmd(dev, mailbox->dma.da, qp->qpn | (!!sqd_event << 31), + new_state == MLX4_QP_STATE_RST ? 2 : 0, + op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C); + + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_qp_modify); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + int err; + + if (sqpn) + qp->qpn = sqpn; + else { + qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap); + if (qp->qpn == -1) + return -ENOMEM; + } + + err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn); + if (err) + goto err_out; + + err = mlx4_table_get(dev, &qp_table->auxc_table, qp->qpn); + if (err) + goto err_put_qp; + + err = mlx4_table_get(dev, &qp_table->altc_table, qp->qpn); + if (err) + goto err_put_auxc; + + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qp->qpn); + if (err) + goto err_put_altc; + + err = mlx4_table_get(dev, &qp_table->cmpt_table, qp->qpn); + if (err) + goto err_put_rdmarc; + + spin_lock_irq(&qp_table->lock); + err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp); + spin_unlock_irq(&qp_table->lock); + if (err) + goto err_put_cmpt; + + atomic_set(&qp->refcount, 1); + init_completion(&qp->free); + + return 0; + +err_put_cmpt: + mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn); + +err_put_rdmarc: + mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn); + +err_put_altc: + mlx4_table_put(dev, &qp_table->altc_table, qp->qpn); + +err_put_auxc: + mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); + +err_put_qp: + mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); + +err_out: + if (!sqpn) + mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_qp_alloc); + +void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + unsigned long flags; + + spin_lock_irqsave(&qp_table->lock, &flags); + radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1)); + spin_unlock_irqrestore(&qp_table->lock, flags); +} +EXPORT_SYMBOL_GPL(mlx4_qp_remove); + +void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); + wait_for_completion(&qp->free); + + mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn); + mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn); + mlx4_table_put(dev, &qp_table->altc_table, qp->qpn); + mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); + mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); + + if (qp->qpn >= dev->caps.sqp_start + 8) + mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); +} +EXPORT_SYMBOL_GPL(mlx4_qp_free); + +static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn) +{ + return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP, + MLX4_CMD_TIME_CLASS_B); +} + +int mlx4_init_qp_table(struct mlx4_dev *dev) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + int err; + + spin_lock_init(&qp_table->lock); + INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); + + /* + * We reserve 2 extra QPs per port for the special QPs. The + * block of special QPs must be aligned to a multiple of 8, so + * round up. + */ + dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8); + err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, + (1 << 24) - 1, dev->caps.sqp_start + 8); + if (err) + return err; + + return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start); +} + +void mlx4_cleanup_qp_table(struct mlx4_dev *dev) +{ + mlx4_CONF_SPECIAL_QP(dev, 0); + mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap); + radix_tree_destroy(&dev->qp_table_tree); +} + +int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, + struct mlx4_qp_context *context) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma.da, qp->qpn, 0, + MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A); + if (!err) + memcpy(context, (u8*)mailbox->buf + 8, sizeof *context); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_qp_query); + diff --git a/branches/ConnectX/hw/mlx4/kernel/net/reset.c b/branches/ConnectX/hw/mlx4/kernel/net/reset.c new file mode 100644 index 00000000..a46a3bf9 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/reset.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "mlx4.h" + +int mlx4_reset(struct mlx4_dev *dev) +{ + void __iomem *reset; + u32 *hca_header = NULL; + int pcie_cap; + u16 devctl; + u16 linkctl; + u16 vendor; + unsigned long end; + u32 sem; + int i; + int err = 0; + +#define MLX4_RESET_BASE 0xf0000 +#define MLX4_RESET_SIZE 0x400 +#define MLX4_SEM_OFFSET 0x3fc +#define MLX4_RESET_OFFSET 0x10 +#define MLX4_RESET_VALUE swab32(1) + +#define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ) +#define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ) + + /* + * Reset the chip. This is somewhat ugly because we have to + * save off the PCI header before reset and then restore it + * after the chip reboots. We skip config space offsets 22 + * and 23 since those have a special meaning. + */ + + /* Do we need to save off the full 4K PCI Express header?? */ + hca_header = kmalloc(256, GFP_KERNEL); + if (!hca_header) { + err = -ENOMEM; + mlx4_err(dev, "Couldn't allocate memory to save HCA " + "PCI header, aborting.\n"); + goto out; + } + + pcie_cap = pci_find_capability(dev->pdev, PCI_CAP_ID_EXP); + + for (i = 0; i < 64; ++i) { + if (i == 22 || i == 23) + continue; + if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) { + err = -ENODEV; + mlx4_err(dev, "Couldn't save HCA " + "PCI header, aborting.\n"); + goto out; + } + } + + reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE, + MLX4_RESET_SIZE); + if (!reset) { + err = -ENOMEM; + mlx4_err(dev, "Couldn't map HCA reset register, aborting.\n"); + goto out; + } + + /* grab HW semaphore to lock out flash updates */ + end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES; + do { + sem = readl(reset + MLX4_SEM_OFFSET); + if (!sem) + break; + + msleep(1); + } while (time_before(jiffies, end)); + + if (sem) { + mlx4_err(dev, "Failed to obtain HW semaphore, aborting\n"); + err = -EAGAIN; + iounmap(reset, MLX4_RESET_SIZE); + goto out; + } + + /* actually hit reset */ + writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET); + iounmap(reset, MLX4_RESET_SIZE); + + /* Docs say to wait one second before accessing device */ + msleep(1000); + + end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; + do { + if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) && + vendor != 0xffff) + break; + + msleep(1); + } while (time_before(jiffies, end)); + + if (vendor == 0xffff) { + err = -ENODEV; + mlx4_err(dev, "PCI device did not come back after reset, " + "aborting.\n"); + goto out; + } + + /* Now restore the PCI headers */ + if (pcie_cap) { + devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; + if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL, + devctl)) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA PCI Express " + "Device Control register, aborting.\n"); + goto out; + } + linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; + if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL, + linkctl)) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA PCI Express " + "Link control register, aborting.\n"); + goto out; + } + } + + for (i = 0; i < 16; ++i) { + if (i * 4 == PCI_COMMAND) + continue; + + if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA reg %x, " + "aborting.\n", i); + goto out; + } + } + + if (pci_write_config_dword(dev->pdev, PCI_COMMAND, + hca_header[PCI_COMMAND / 4])) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA COMMAND, " + "aborting.\n"); + goto out; + } + +out: + kfree(hca_header); + + return err; +} diff --git a/branches/ConnectX/hw/mlx4/kernel/net/srq.c b/branches/ConnectX/hw/mlx4/kernel/net/srq.c new file mode 100644 index 00000000..ca634475 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/net/srq.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "icm.h" +#include "cmd.h" + +struct mlx4_srq_context { + __be32 state_logsize_srqn; + u8 logstride; + u8 reserved1[3]; + u8 pg_offset; + u8 reserved2[3]; + u32 reserved3; + u8 log_page_size; + u8 reserved4[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + __be32 pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved5; + __be16 wqe_counter; + u32 reserved6; + __be64 db_rec_addr; +}; + +void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_srq *srq; + + spin_lock_dpc(&srq_table->lock); + + srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock_dpc(&srq_table->lock); + + if (!srq) { + mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} + +static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd(dev, mailbox->dma.da, srq_num, 0, MLX4_CMD_SW2HW_SRQ, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma.da : 0, srq_num, + mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark) +{ + return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ, + MLX4_CMD_TIME_CLASS_B); +} + +static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox->dma.da, srq_num, 0, MLX4_CMD_QUERY_SRQ, + MLX4_CMD_TIME_CLASS_A); +} + +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, + u64 db_rec, struct mlx4_srq *srq) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_srq_context *srq_context; + u64 mtt_addr; + int err; + + srq->srqn = mlx4_bitmap_alloc(&srq_table->bitmap); + if (srq->srqn == -1) + return -ENOMEM; + + err = mlx4_table_get(dev, &srq_table->table, srq->srqn); + if (err) + goto err_out; + + err = mlx4_table_get(dev, &srq_table->cmpt_table, srq->srqn); + if (err) + goto err_put; + + spin_lock_irq(&srq_table->lock); + err = radix_tree_insert(&srq_table->tree, srq->srqn, srq); + spin_unlock_irq(&srq_table->lock); + if (err) + goto err_cmpt_put; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_radix; + } + + srq_context = mailbox->buf; + memset(srq_context, 0, sizeof *srq_context); + + srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) | + srq->srqn); + srq_context->logstride = (u8)(srq->wqe_shift - 4); + srq_context->log_page_size = (u8)(mtt->page_shift - MLX4_ICM_PAGE_SHIFT); + + mtt_addr = mlx4_mtt_addr(dev, mtt); + srq_context->mtt_base_addr_h = (u8)(mtt_addr >> 32); + srq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + srq_context->pd = cpu_to_be32(pdn); + srq_context->db_rec_addr = cpu_to_be64(db_rec); + + err = mlx4_SW2HW_SRQ(dev, mailbox, srq->srqn); + mlx4_free_cmd_mailbox(dev, mailbox); + if (err) + goto err_radix; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + return 0; + +err_radix: + spin_lock_irq(&srq_table->lock); + radix_tree_delete(&srq_table->tree, srq->srqn); + spin_unlock_irq(&srq_table->lock); + +err_cmpt_put: + mlx4_table_put(dev, &srq_table->cmpt_table, srq->srqn); + +err_put: + mlx4_table_put(dev, &srq_table->table, srq->srqn); + +err_out: + mlx4_bitmap_free(&srq_table->bitmap, srq->srqn); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_srq_alloc); + +void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + int err; + + err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn); + if (err) + mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn); + + spin_lock_irq(&srq_table->lock); + radix_tree_delete(&srq_table->tree, srq->srqn); + spin_unlock_irq(&srq_table->lock); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + mlx4_table_put(dev, &srq_table->table, srq->srqn); + mlx4_bitmap_free(&srq_table->bitmap, srq->srqn); +} +EXPORT_SYMBOL_GPL(mlx4_srq_free); + +int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark) +{ + return mlx4_ARM_SRQ(dev, srq->srqn, limit_watermark); +} +EXPORT_SYMBOL_GPL(mlx4_srq_arm); + +int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_srq_context *srq_context; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + + err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn); + if (err) + goto err_out; + *limit_watermark = be16_to_cpu(srq_context->limit_watermark); + +err_out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_srq_query); + +int mlx4_init_srq_table(struct mlx4_dev *dev) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + int err; + + spin_lock_init(&srq_table->lock); + INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC); + + err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs, + dev->caps.num_srqs - 1, dev->caps.reserved_srqs); + if (err) + return err; + + return 0; +} + +void mlx4_cleanup_srq_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap); + radix_tree_destroy(&mlx4_priv(dev)->srq_table.tree); +} diff --git a/branches/ConnectX/hw/mlx4/kernel/vc.h b/branches/ConnectX/hw/mlx4/kernel/vc.h new file mode 100644 index 00000000..f7ccdd59 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel/vc.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_vc.h 1912 2007-01-17 11:08:02Z leonid $ + */ + +#pragma once + +#include + +typedef +struct _map_crspace { + unsigned __int64 va; /* address of CRSPACE, mapped to user space */ + unsigned long size; /* size of CRSPACE, mapped to user space */ + unsigned long reserved; /* to align on quadword boundary */ +} map_crspace; + +/* Definitions for hca_driver commands*/ +#define FW_READ 0x00 +#define FW_WRITE 0x01 +#define FW_READ_CMD 0x08 +#define FW_WRITE_CMD 0x09 +#define FW_MAP_CRSPACE 0x0A +#define FW_UNMAP_CRSPACE 0x0B +#define FW_OPEN_IF 0xe7 +#define FW_CLOSE_IF 0x7e + +/* uplink info */ +typedef struct { + uint8_t bus_type; /* 1 - PCI, 2 - PCI-X, 3 - PCI_E */ +#define UPLINK_BUS_PCI 1 +#define UPLINK_BUS_PCIX 2 +#define UPLINK_BUS_PCIE 3 + union { + struct { + uint8_t capabilities; +#define UPLINK_BUS_PCIX_133 2 /* 133 MHz capable */ + uint16_t frequency; /* in MHz */ + } pci_x; + struct { + uint8_t capabilities; + uint8_t link_speed; /* 1X link speed */ +#define UPLINK_BUS_PCIE_SDR 1 /* 2.5 Gbps */ +#define UPLINK_BUS_PCIE_DDR 2 /* 5 Gbps */ + uint8_t link_width; /* x1, x2, x4, x8, x12, x16, x32 */ + } pci_e; + } u; +} uplink_info_t; + + diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/core_0020_csum.patch b/branches/ConnectX/hw/mlx4/kernel_patches/core_0020_csum.patch new file mode 100644 index 00000000..9b21e568 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/core_0020_csum.patch @@ -0,0 +1,51 @@ +From 11d392f57b2199f5c8071360ebf03c2fc6c4afb2 Mon Sep 17 00:00:00 2001 +From: Eli Cohen +Date: Tue, 15 Jan 2008 12:15:59 +0200 +Subject: [PATCH] Add checksum support to ib core + +Signed-off-by: Eli Cohen +--- + include/rdma/ib_verbs.h | 13 +++++++++++-- + 1 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h +index 11f3960..e35cc29 100644 +--- a/include/rdma/ib_verbs.h ++++ b/include/rdma/ib_verbs.h +@@ -95,7 +95,14 @@ enum ib_device_cap_flags { + IB_DEVICE_N_NOTIFY_CQ = (1<<14), + IB_DEVICE_ZERO_STAG = (1<<15), + IB_DEVICE_SEND_W_INV = (1<<16), +- IB_DEVICE_MEM_WINDOW = (1<<17) ++ IB_DEVICE_MEM_WINDOW = (1<<17), ++ /* ++ * devices which publish this capability must support insertion of UDP ++ * and TCP checksum on outgoing packets and can verify the validity of ++ * checksum for incoming packets. Setting this flag implies the driver ++ * may set NETIF_F_IP_CSUM. ++ */ ++ IB_DEVICE_IP_CSUM = (1<<18), + }; + + enum ib_atomic_cap { +@@ -431,6 +438,7 @@ struct ib_wc { + u8 sl; + u8 dlid_path_bits; + u8 port_num; /* valid only for DR SMPs on switches */ ++ int csum_ok; + }; + + enum ib_cq_notify_flags { +@@ -615,7 +623,8 @@ enum ib_send_flags { + IB_SEND_FENCE = 1, + IB_SEND_SIGNALED = (1<<1), + IB_SEND_SOLICITED = (1<<2), +- IB_SEND_INLINE = (1<<3) ++ IB_SEND_INLINE = (1<<3), ++ IB_SEND_IP_CSUM = (1<<4) + }; + + struct ib_sge { +-- +1.5.3.8 + diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/core_0025_qp_create_flags.patch b/branches/ConnectX/hw/mlx4/kernel_patches/core_0025_qp_create_flags.patch new file mode 100644 index 00000000..d5fff8ba --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/core_0025_qp_create_flags.patch @@ -0,0 +1,53 @@ +From a1d2b448a530a829c2ae3a896c0f2d3adc90a069 Mon Sep 17 00:00:00 2001 +From: Eli Cohen +Date: Tue, 15 Jan 2008 15:42:31 +0200 +Subject: [PATCH] Add creation flags to QPs + +This will allow a kernel verbs consumer to create a QP +and pass special flags to the hw layer. This patch also +defines one such flag for LSO support. + +Signed-off-by: Eli Cohen +--- + drivers/infiniband/core/uverbs_cmd.c | 1 + + include/rdma/ib_verbs.h | 5 +++++ + 2 files changed, 6 insertions(+), 0 deletions(-) + +diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c +index 495c803..9e98cec 100644 +--- a/drivers/infiniband/core/uverbs_cmd.c ++++ b/drivers/infiniband/core/uverbs_cmd.c +@@ -1065,6 +1065,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + attr.srq = srq; + attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd.qp_type; ++ attr.create_flags = 0; + + attr.cap.max_send_wr = cmd.max_send_wr; + attr.cap.max_recv_wr = cmd.max_recv_wr; +diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h +index e35cc29..a4f6184 100644 +--- a/include/rdma/ib_verbs.h ++++ b/include/rdma/ib_verbs.h +@@ -494,6 +494,10 @@ enum ib_qp_type { + IB_QPT_RAW_ETY + }; + ++enum qp_create_flags { ++ QP_CREATE_LSO = 1 << 0, ++}; ++ + struct ib_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; +@@ -504,6 +508,7 @@ struct ib_qp_init_attr { + enum ib_sig_type sq_sig_type; + enum ib_qp_type qp_type; + u8 port_num; /* special QP types only */ ++ enum qp_create_flags create_flags; + }; + + enum ib_rnr_timeout { +-- +1.5.3.8 + diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/core_0030_lso.patch b/branches/ConnectX/hw/mlx4/kernel_patches/core_0030_lso.patch new file mode 100644 index 00000000..16a8e08c --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/core_0030_lso.patch @@ -0,0 +1,66 @@ +From 86a166b61efd6c040bd6d508a4179e3e15827ac0 Mon Sep 17 00:00:00 2001 +From: Eli Cohen +Date: Tue, 15 Jan 2008 15:48:20 +0200 +Subject: [PATCH] Add core support for LSO + +LSO allows to pass to the network driver SKBs with data larger +than MTU and let the HW fragment the packet to mss quantities. + +Signed-off-by: Eli Cohen +--- + include/rdma/ib_verbs.h | 11 +++++++++-- + 1 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h +index a4f6184..6ef1729 100644 +--- a/include/rdma/ib_verbs.h ++++ b/include/rdma/ib_verbs.h +@@ -103,6 +103,7 @@ enum ib_device_cap_flags { + * may set NETIF_F_IP_CSUM. + */ + IB_DEVICE_IP_CSUM = (1<<18), ++ IB_DEVICE_TCP_TSO = (1<<19), + }; + + enum ib_atomic_cap { +@@ -410,6 +411,7 @@ enum ib_wc_opcode { + IB_WC_COMP_SWAP, + IB_WC_FETCH_ADD, + IB_WC_BIND_MW, ++ IB_WC_LSO, + /* + * Set value of IB_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & IB_WC_RECV). +@@ -621,7 +623,8 @@ enum ib_wr_opcode { + IB_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP, +- IB_WR_ATOMIC_FETCH_AND_ADD ++ IB_WR_ATOMIC_FETCH_AND_ADD, ++ IB_WR_LSO + }; + + enum ib_send_flags { +@@ -629,7 +632,8 @@ enum ib_send_flags { + IB_SEND_SIGNALED = (1<<1), + IB_SEND_SOLICITED = (1<<2), + IB_SEND_INLINE = (1<<3), +- IB_SEND_IP_CSUM = (1<<4) ++ IB_SEND_IP_CSUM = (1<<4), ++ IB_SEND_UDP_LSO = (1<<5) + }; + + struct ib_sge { +@@ -659,6 +663,9 @@ struct ib_send_wr { + } atomic; + struct { + struct ib_ah *ah; ++ void *header; ++ int hlen; ++ int mss; + u32 remote_qpn; + u32 remote_qkey; + u16 pkey_index; /* valid for GSI only */ +-- +1.5.3.8 + diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0010_add_wc.patch b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0010_add_wc.patch new file mode 100644 index 00000000..767bb269 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0010_add_wc.patch @@ -0,0 +1,312 @@ +IB/mlx4: set write-combining flag for userspace blueflame pages + +Supported on i386 and x86_64 for now. + +Signed-off-by: Michael S. Tsirkin + +diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile +index 70f09c7..ce885a8 100644 +--- a/drivers/infiniband/hw/mlx4/Makefile ++++ b/drivers/infiniband/hw/mlx4/Makefile +@@ -1,3 +1,4 @@ + obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o + + mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o ++mlx4_ib-y += wc.o +diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c +index 5128d95..f60a3cd 100644 +--- a/drivers/infiniband/hw/mlx4/main.c ++++ b/drivers/infiniband/hw/mlx4/main.c +@@ -42,6 +42,7 @@ + + #include "mlx4_ib.h" + #include "user.h" ++#include "wc.h" + + #define DRV_NAME "mlx4_ib" + #define DRV_VERSION "0.01" +@@ -375,7 +376,7 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) + return -EAGAIN; + } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { + /* FIXME want pgprot_writecombine() for BlueFlame pages */ +- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ++ vma->vm_page_prot = pgprot_wc(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn + +@@ -611,12 +612,14 @@ static struct mlx4_interface mlx4_ib_interface = { + + static int __init mlx4_ib_init(void) + { ++ mlx4_enable_wc(); + return mlx4_register_interface(&mlx4_ib_interface); + } + + static void __exit mlx4_ib_cleanup(void) + { + mlx4_unregister_interface(&mlx4_ib_interface); ++ mlx4_disable_wc(); + } + + module_init(mlx4_ib_init); +diff --git a/drivers/infiniband/hw/mlx4/wc.c b/drivers/infiniband/hw/mlx4/wc.c +new file mode 100644 +index 0000000..3747ab1 +--- /dev/null ++++ b/drivers/infiniband/hw/mlx4/wc.c +@@ -0,0 +1,206 @@ ++/* ++ * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include ++#include "wc.h" ++ ++static u32 old_pat_lo[NR_CPUS] = {0}; ++static u32 old_pat_hi[NR_CPUS] = {0}; ++static unsigned int wc_enabled = 0; ++ ++#define MLX4_PAT_MASK (0xFFFFF8FF) ++#define MLX4_PAT_MOD (0x00000100) ++#define MLX4_WC_FLAGS (_PAGE_PWT) ++ ++#if defined(__i386__) || defined(__x86_64__) ++ ++#define X86_MSR_PAT_OFFSET 0x277 ++ ++/* Returns non-zero if we have a chipset write-combining problem */ ++static int have_wc_errata(void) ++{ ++ struct pci_dev *dev; ++ u8 rev; ++ ++ if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { ++ /* ++ * ServerWorks LE chipsets < rev 6 have problems with ++ * write-combining. ++ */ ++ if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && ++ dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { ++ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); ++ if (rev <= 5) { ++ printk(KERN_INFO "ib_mlx4: Serverworks LE rev < 6" ++ " detected. Write-combining disabled.\n"); ++ pci_dev_put(dev); ++ return -ENOSYS; ++ } ++ } ++ /* Intel 450NX errata # 23. Non ascending cacheline evictions ++ to write combining memory may resulting in data corruption */ ++ if (dev->vendor == PCI_VENDOR_ID_INTEL && ++ dev->device == PCI_DEVICE_ID_INTEL_82451NX) { ++ printk(KERN_INFO "ib_mlx4: Intel 450NX MMC detected." ++ " Write-combining disabled.\n"); ++ pci_dev_put(dev); ++ return -ENOSYS; ++ } ++ pci_dev_put(dev); ++ } ++ return 0; ++} ++ ++static void rd_old_pat(void *err) ++{ ++ *(int *)err |= rdmsr_safe(X86_MSR_PAT_OFFSET, ++ &old_pat_lo[smp_processor_id()], ++ &old_pat_hi[smp_processor_id()]); ++} ++ ++static void wr_new_pat(void *err) ++{ ++ u32 new_pat_lo = (old_pat_lo[smp_processor_id()] & MLX4_PAT_MASK) | ++ MLX4_PAT_MOD; ++ ++ *(int *)err |= wrmsr_safe(X86_MSR_PAT_OFFSET, ++ new_pat_lo, ++ old_pat_hi[smp_processor_id()]); ++} ++ ++static void wr_old_pat(void *err) ++{ ++ *(int *)err |= wrmsr_safe(X86_MSR_PAT_OFFSET, ++ old_pat_lo[smp_processor_id()], ++ old_pat_hi[smp_processor_id()]); ++} ++ ++static int read_and_modify_pat(void) ++{ ++ int ret = 0; ++ ++ preempt_disable(); ++ rd_old_pat(&ret); ++ if (!ret) ++ smp_call_function(rd_old_pat, &ret, 1, 1); ++ if (ret) ++ goto out; ++ ++ wr_new_pat(&ret); ++ if (ret) ++ goto out; ++ ++ smp_call_function(wr_new_pat, &ret, 1, 1); ++ BUG_ON(ret); /* have inconsistent PAT state */ ++out: ++ preempt_enable(); ++ return ret; ++} ++ ++static int restore_pat(void) ++{ ++ int ret = 0; ++ ++ preempt_disable(); ++ wr_old_pat(&ret); ++ if (!ret) { ++ smp_call_function(wr_old_pat, &ret, 1, 1); ++ BUG_ON(ret); /* have inconsistent PAT state */ ++ } ++ ++ preempt_enable(); ++ return ret; ++} ++ ++int mlx4_enable_wc(void) ++{ ++ struct cpuinfo_x86 *c = &cpu_data(0); ++ int ret; ++ ++ if (wc_enabled) ++ return 0; ++ ++ if (!cpu_has(c, X86_FEATURE_MSR) || ++ !cpu_has(c, X86_FEATURE_PAT)) { ++ printk(KERN_INFO "ib_mlx4: WC not available" ++ " on this processor\n"); ++ return -ENOSYS; ++ } ++ ++ if (have_wc_errata()) ++ return -ENOSYS; ++ ++ if (!(ret = read_and_modify_pat())) ++ wc_enabled = 1; ++ else ++ printk(KERN_INFO "ib_mlx4: failed to enable WC\n"); ++ return ret ? -EIO : 0; ++} ++ ++void mlx4_disable_wc(void) ++{ ++ if (wc_enabled) { ++ if (!restore_pat()) ++ wc_enabled = 0; ++ else ++ printk(KERN_INFO "ib_mlx4: failed to disable WC\n"); ++ } ++} ++ ++pgprot_t pgprot_wc(pgprot_t _prot) ++{ ++ return wc_enabled ? __pgprot(pgprot_val(_prot) | MLX4_WC_FLAGS) : ++ pgprot_noncached(_prot); ++} ++ ++int mlx4_wc_enabled(void) ++{ ++ return wc_enabled; ++} ++ ++#else /* !(defined(__i386__) || defined(__x86_64__)) */ ++ ++int mlx4_enable_wc(void){ return 0; } ++void mlx4_disable_wc(void){} ++ ++pgprot_t pgprot_wc(pgprot_t _prot) ++{ ++ return pgprot_noncached(_prot); ++} ++ ++int mlx4_wc_enabled(void) ++{ ++ return 0; ++} ++ ++#endif ++ +diff --git a/drivers/infiniband/hw/mlx4/wc.h b/drivers/infiniband/hw/mlx4/wc.h +new file mode 100644 +index 0000000..70b891d +--- /dev/null ++++ b/drivers/infiniband/hw/mlx4/wc.h +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#ifndef mlx4_WC_H ++#define mlx4_WC_H ++ ++#include ++ ++int mlx4_enable_wc(void); ++void mlx4_disable_wc(void); ++int mlx4_wc_enabled(void); ++pgprot_t pgprot_wc(pgprot_t _prot); ++ ++#endif diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0030_checksum_offload.patch b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0030_checksum_offload.patch new file mode 100644 index 00000000..2df85b1d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0030_checksum_offload.patch @@ -0,0 +1,125 @@ +From cb0f57646824cc986000cc2b8e36cf306f4cda18 Mon Sep 17 00:00:00 2001 +From: Eli Cohen +Date: Tue, 15 Jan 2008 14:47:39 +0200 +Subject: [PATCH] Add checksum offload support to mlx4 + +Signed-off-by: Eli Cohen +Signed-off-by: Ali Ayub +--- + drivers/infiniband/hw/mlx4/cq.c | 2 ++ + drivers/infiniband/hw/mlx4/main.c | 5 +++++ + drivers/infiniband/hw/mlx4/qp.c | 3 +++ + drivers/net/mlx4/fw.c | 3 +++ + include/linux/mlx4/cq.h | 4 ++-- + include/linux/mlx4/qp.h | 2 ++ + 6 files changed, 17 insertions(+), 2 deletions(-) + +Index: ofed_kernel/drivers/infiniband/hw/mlx4/cq.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/cq.c 2008-01-24 12:01:00.000000000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/cq.c 2008-01-24 12:09:24.000000000 +0200 +@@ -314,6 +314,11 @@ static int mlx4_ib_poll_one(struct mlx4_ + int is_send; + int is_error; + u16 wqe_ctr; ++ __be32 status; ++ ++#define CSUM_MASK_BITS cpu_to_be32(0x13c00000) ++#define CSUM_VAL_BITS cpu_to_be32(0x10400000) ++#define CSUM_MASK2_BITS cpu_to_be32(0x0c000000) + + cqe = next_cqe_sw(cq); + if (!cqe) +@@ -431,6 +436,10 @@ static int mlx4_ib_poll_one(struct mlx4_ + wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? + IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; ++ status = cqe->ipoib_status; ++ wc->csum_ok = (status & CSUM_MASK_BITS) == CSUM_VAL_BITS && ++ (status & CSUM_MASK2_BITS) && ++ cqe->checksum == 0xffff; + } + + return 0; +Index: ofed_kernel/drivers/infiniband/hw/mlx4/main.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/main.c 2008-01-24 12:01:17.000000000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/main.c 2008-01-24 12:03:18.000000000 +0200 +@@ -100,6 +100,8 @@ static int mlx4_ib_query_device(struct i + props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) + props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; ++ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) ++ props->device_cap_flags |= IB_DEVICE_IP_CSUM; + + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & + 0xffffff; +@@ -613,6 +615,9 @@ static void *mlx4_ib_add(struct mlx4_dev + ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; + ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + ++ if (ibdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) ++ ibdev->ib_dev.flags |= IB_DEVICE_IP_CSUM; ++ + if (init_node_data(ibdev)) + goto err_map; + +Index: ofed_kernel/drivers/infiniband/hw/mlx4/qp.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/qp.c 2008-01-24 12:01:00.000000000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/qp.c 2008-01-24 12:03:18.000000000 +0200 +@@ -1307,6 +1307,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp + cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | ++ ((wr->send_flags & IB_SEND_IP_CSUM) ? ++ cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | ++ MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | + qp->sq_signal_bits; + + if (wr->opcode == IB_WR_SEND_WITH_IMM || +Index: ofed_kernel/drivers/net/mlx4/fw.c +=================================================================== +--- ofed_kernel.orig/drivers/net/mlx4/fw.c 2008-01-24 12:01:17.000000000 +0200 ++++ ofed_kernel/drivers/net/mlx4/fw.c 2008-01-24 12:03:18.000000000 +0200 +@@ -741,6 +741,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, + MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); + MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + ++ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) ++ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); ++ + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000); + + if (err) +Index: ofed_kernel/include/linux/mlx4/cq.h +=================================================================== +--- ofed_kernel.orig/include/linux/mlx4/cq.h 2008-01-24 12:01:00.000000000 +0200 ++++ ofed_kernel/include/linux/mlx4/cq.h 2008-01-24 12:03:18.000000000 +0200 +@@ -45,11 +45,11 @@ struct mlx4_cqe { + u8 sl; + u8 reserved1; + __be16 rlid; +- u32 reserved2; ++ __be32 ipoib_status; + __be32 byte_cnt; + __be16 wqe_index; + __be16 checksum; +- u8 reserved3[3]; ++ u8 reserved2[3]; + u8 owner_sr_opcode; + }; + +Index: ofed_kernel/include/linux/mlx4/qp.h +=================================================================== +--- ofed_kernel.orig/include/linux/mlx4/qp.h 2008-01-24 12:01:00.000000000 +0200 ++++ ofed_kernel/include/linux/mlx4/qp.h 2008-01-24 12:03:18.000000000 +0200 +@@ -158,6 +158,8 @@ enum { + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICITED = 1 << 1, ++ MLX4_WQE_CTRL_IP_CSUM = 1 << 4, ++ MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, + }; + + struct mlx4_wqe_ctrl_seg { diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0045_qp_flags.patch b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0045_qp_flags.patch new file mode 100644 index 00000000..835b2911 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0045_qp_flags.patch @@ -0,0 +1,76 @@ +mlx4: Add creation flags to mlx4 QPs + +The core passes creation flags and mlx4 saves them for later +reference. + +rev 2: +changed flags field to int +moved setting flags to qp_create_common. + +Signed-off-by: Eli Cohen +Signed-off-by: Jack Morgenstein + +--- + +Index: ofed_kernel/drivers/infiniband/hw/mlx4/mlx4_ib.h +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/mlx4_ib.h 2008-01-23 13:15:31.282457000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/mlx4_ib.h 2008-01-23 15:58:48.546092000 +0200 +@@ -110,6 +110,10 @@ struct mlx4_ib_wq { + unsigned tail; + }; + ++enum qp_flags { ++ MLX4_QP_LSO = 1 << 0 ++}; ++ + struct mlx4_ib_qp { + struct ib_qp ibqp; + struct mlx4_qp mqp; +@@ -133,6 +137,7 @@ struct mlx4_ib_qp { + u8 resp_depth; + u8 sq_no_prefetch; + u8 state; ++ int flags; + }; + + struct mlx4_ib_srq { +Index: ofed_kernel/drivers/infiniband/hw/mlx4/qp.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/qp.c 2008-01-23 13:15:31.287456000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/qp.c 2008-01-23 16:00:38.734097000 +0200 +@@ -238,9 +238,13 @@ static int set_rq_size(struct mlx4_ib_de + return 0; + } + +-static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, +- enum ib_qp_type type, struct mlx4_ib_qp *qp) ++static int set_kernel_sq_size(struct mlx4_ib_dev *dev, ++ struct ib_qp_init_attr *init_attr, ++ struct mlx4_ib_qp *qp) + { ++ struct ib_qp_cap *cap = &init_attr->cap; ++ enum ib_qp_type type = init_attr->qp_type; ++ + /* Sanity check SQ size before proceeding */ + if (cap->max_send_wr > dev->dev->caps.max_wqes || + cap->max_send_sge > dev->dev->caps.max_sq_sg || +@@ -328,6 +332,9 @@ static int create_qp_common(struct mlx4_ + qp->sq.head = 0; + qp->sq.tail = 0; + ++ if (init_attr->create_flags & QP_CREATE_LSO) ++ qp->flags |= MLX4_QP_LSO; ++ + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); + if (err) + goto err; +@@ -371,7 +378,7 @@ static int create_qp_common(struct mlx4_ + } else { + qp->sq_no_prefetch = 0; + +- err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); ++ err = set_kernel_sq_size(dev, init_attr, qp); + if (err) + goto err; + diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0050_lso.patch b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0050_lso.patch new file mode 100644 index 00000000..f84b686d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0050_lso.patch @@ -0,0 +1,249 @@ +From 33c5e1a802583cd84b55a4c5270e9d7753ac29bf Mon Sep 17 00:00:00 2001 +From: Eli Cohen +Date: Tue, 15 Jan 2008 18:57:09 +0200 +Subject: [PATCH] Add LSO support to mlx4 + +mlx4: Add LSO support. + +Changes: +Adjusted setting "reserve" value in set_kernel_sq_size to fit changes +in qp_flags patch. + +Signed-off-by: Eli Cohen +Signed-off-by: Jack Morgenstein + +--- + drivers/infiniband/hw/mlx4/cq.c | 3 ++ + drivers/infiniband/hw/mlx4/main.c | 4 +++ + drivers/infiniband/hw/mlx4/qp.c | 52 +++++++++++++++++++++++++++++++++--- + drivers/net/mlx4/fw.c | 9 ++++++ + drivers/net/mlx4/fw.h | 1 + + drivers/net/mlx4/main.c | 1 + + include/linux/mlx4/device.h | 1 + + include/linux/mlx4/qp.h | 5 +++ + 8 files changed, 71 insertions(+), 5 deletions(-) + +Index: ofed_kernel/drivers/infiniband/hw/mlx4/cq.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/cq.c 2008-01-23 16:01:48.392614000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/cq.c 2008-01-23 16:05:20.076983000 +0200 +@@ -408,6 +408,9 @@ static int mlx4_ib_poll_one(struct mlx4_ + case MLX4_OPCODE_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; ++ case MLX4_OPCODE_LSO: ++ wc->opcode = IB_WC_LSO; ++ break; + } + } else { + wc->byte_len = be32_to_cpu(cqe->byte_cnt); +Index: ofed_kernel/drivers/infiniband/hw/mlx4/main.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/main.c 2008-01-23 16:01:48.398613000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/main.c 2008-01-23 16:05:20.081982000 +0200 +@@ -102,6 +102,8 @@ static int mlx4_ib_query_device(struct i + props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + props->device_cap_flags |= IB_DEVICE_IP_CSUM; ++ if (dev->dev->caps.max_gso_sz) ++ props->device_cap_flags |= IB_DEVICE_TCP_TSO; + + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & + 0xffffff; +@@ -617,6 +619,8 @@ static void *mlx4_ib_add(struct mlx4_dev + + if (ibdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + ibdev->ib_dev.flags |= IB_DEVICE_IP_CSUM; ++ if (ibdev->dev->caps.max_gso_sz) ++ ibdev->ib_dev.flags |= IB_DEVICE_TCP_TSO; + + if (init_node_data(ibdev)) + goto err_map; +Index: ofed_kernel/drivers/infiniband/hw/mlx4/qp.c +=================================================================== +--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/qp.c 2008-01-23 16:01:51.101506000 +0200 ++++ ofed_kernel/drivers/infiniband/hw/mlx4/qp.c 2008-01-23 16:08:04.078114000 +0200 +@@ -69,6 +69,7 @@ enum { + + static const __be32 mlx4_ib_opcode[] = { + [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), ++ [IB_WR_LSO] = __constant_cpu_to_be32(MLX4_OPCODE_LSO), + [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), + [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), + [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), +@@ -244,6 +245,7 @@ static int set_kernel_sq_size(struct mlx + { + struct ib_qp_cap *cap = &init_attr->cap; + enum ib_qp_type type = init_attr->qp_type; ++ int reserve = 0; + + /* Sanity check SQ size before proceeding */ + if (cap->max_send_wr > dev->dev->caps.max_wqes || +@@ -260,12 +262,16 @@ static int set_kernel_sq_size(struct mlx + cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) + return -EINVAL; + ++ if (qp->flags & MLX4_QP_LSO) ++ reserve = 64; ++ + qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * +- sizeof (struct mlx4_wqe_data_seg), ++ sizeof (struct mlx4_wqe_data_seg) + ++ reserve, + cap->max_inline_data + + sizeof (struct mlx4_wqe_inline_seg)) + + send_wqe_overhead(type))); +- qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / ++ qp->sq.max_gs = ((1 << qp->sq.wqe_shift) -reserve - send_wqe_overhead(type)) / + sizeof (struct mlx4_wqe_data_seg); + + /* +@@ -756,9 +764,11 @@ static int __mlx4_ib_modify_qp(struct ib + } + } + +- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || +- ibqp->qp_type == IB_QPT_UD) ++ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; ++ else if (ibqp->qp_type == IB_QPT_UD) ++ context->mtu_msgmax = (IB_MTU_4096 << 5) | ++ ilog2(dev->dev->caps.max_gso_sz); + else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { + printk(KERN_ERR "path MTU (%u) is invalid\n", +@@ -1276,6 +1286,28 @@ static void __set_data_seg(struct mlx4_w + dseg->addr = cpu_to_be64(sg->addr); + } + ++static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, ++ struct mlx4_ib_qp *qp, int *lso_seg_len) ++{ ++ int halign; ++ ++ halign = ALIGN(wr->wr.ud.hlen, 16); ++ if (unlikely(!(qp->flags & MLX4_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4))) ++ return -EINVAL; ++ ++ memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); ++ ++ /* make sure LSO header is written before ++ overwriting stamping */ ++ wmb(); ++ ++ wqe->mss_hdr_size = cpu_to_be32(((wr->wr.ud.mss - wr->wr.ud.hlen) ++ << 16) | wr->wr.ud.hlen); ++ ++ *lso_seg_len = halign; ++ return 0; ++} ++ + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) + { +@@ -1366,6 +1398,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp + set_datagram_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; ++ ++ if (wr->opcode == IB_WR_LSO) { ++ int hlen; ++ ++ err = build_lso_seg(wqe, wr, qp, &hlen); ++ if (err) { ++ *bad_wr = wr; ++ goto out; ++ } ++ wqe += hlen; ++ size += hlen >> 4; ++ } ++ + break; + + case IB_QPT_SMI: +Index: ofed_kernel/drivers/net/mlx4/fw.c +=================================================================== +--- ofed_kernel.orig/drivers/net/mlx4/fw.c 2008-01-23 16:01:48.430615000 +0200 ++++ ofed_kernel/drivers/net/mlx4/fw.c 2008-01-23 16:05:20.106981000 +0200 +@@ -133,6 +133,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev * + #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 + #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 + #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b ++#define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d + #define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f + #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 + #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 +@@ -215,6 +216,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev * + dev_cap->max_requester_per_qp = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); + dev_cap->max_responder_per_qp = 1 << (field & 0x3f); ++ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET); ++ field &= 0x1f; ++ if (!field) ++ dev_cap->max_gso_sz = 0; ++ else ++ dev_cap->max_gso_sz = 1 << field; ++ + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); + dev_cap->max_rdma_global = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); +@@ -377,6 +385,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev * + dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); + mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", + dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); ++ mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); + + dump_dev_cap_flags(dev, dev_cap->flags); + +Index: ofed_kernel/drivers/net/mlx4/fw.h +=================================================================== +--- ofed_kernel.orig/drivers/net/mlx4/fw.h 2008-01-23 15:58:48.837059000 +0200 ++++ ofed_kernel/drivers/net/mlx4/fw.h 2008-01-23 16:05:20.109984000 +0200 +@@ -96,6 +96,7 @@ struct mlx4_dev_cap { + u8 bmme_flags; + u32 reserved_lkey; + u64 max_icm_sz; ++ int max_gso_sz; + }; + + struct mlx4_adapter { +Index: ofed_kernel/drivers/net/mlx4/main.c +=================================================================== +--- ofed_kernel.orig/drivers/net/mlx4/main.c 2008-01-23 15:58:48.841058000 +0200 ++++ ofed_kernel/drivers/net/mlx4/main.c 2008-01-23 16:05:20.115981000 +0200 +@@ -159,6 +159,7 @@ static int mlx4_dev_cap(struct mlx4_dev + dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); + dev->caps.flags = dev_cap->flags; + dev->caps.stat_rate_support = dev_cap->stat_rate_support; ++ dev->caps.max_gso_sz = dev_cap->max_gso_sz; + + return 0; + } +Index: ofed_kernel/include/linux/mlx4/device.h +=================================================================== +--- ofed_kernel.orig/include/linux/mlx4/device.h 2008-01-23 15:58:48.844060000 +0200 ++++ ofed_kernel/include/linux/mlx4/device.h 2008-01-23 16:05:20.138984000 +0200 +@@ -181,6 +181,7 @@ struct mlx4_caps { + u32 flags; + u16 stat_rate_support; + u8 port_width_cap[MLX4_MAX_PORTS + 1]; ++ int max_gso_sz; + }; + + struct mlx4_buf_list { +Index: ofed_kernel/include/linux/mlx4/qp.h +=================================================================== +--- ofed_kernel.orig/include/linux/mlx4/qp.h 2008-01-23 16:01:48.448613000 +0200 ++++ ofed_kernel/include/linux/mlx4/qp.h 2008-01-23 16:05:20.142981000 +0200 +@@ -215,6 +215,11 @@ struct mlx4_wqe_datagram_seg { + __be32 reservd[2]; + }; + ++struct mlx4_lso_seg { ++ __be32 mss_hdr_size; ++ __be32 header[0]; ++}; ++ + struct mlx4_wqe_bind_seg { + __be32 flags1; + __be32 flags2; diff --git a/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0170_shrinking_wqe.patch b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0170_shrinking_wqe.patch new file mode 100644 index 00000000..588881bb --- /dev/null +++ b/branches/ConnectX/hw/mlx4/kernel_patches/mlx4_0170_shrinking_wqe.patch @@ -0,0 +1,509 @@ +commit 8e6b03bb781ee403e2aa3de9b9576ef42d919ce8 +commit c0aa89f0b295dd0c20b2ff2b1d2eca10cdc84f4b +Author: Michael S. Tsirkin +Date: Thu Aug 30 15:51:40 2007 +0300 + + IB/mlx4: shrinking WQE + + ConnectX supports shrinking wqe, such that a single WR can include + multiple units of wqe_shift. This way, WRs can differ in size, and + do not have to be a power of 2 in size, saving memory and speeding up + send WR posting. Unfortunately, if we do this wqe_index field in CQE + can't be used to look up the WR ID anymore, so do this only if + selective signalling is off. + + Further, on 32-bit platforms, we can't use vmap to make + the QP buffer virtually contigious. Thus we have to use + constant-sized WRs to make sure a WR is always fully within + a single page-sized chunk. + + Finally, we use WR with NOP opcode to avoid wrap-around + in the middle of WR. We set NoErrorCompletion bit to avoid getting + completions with error for NOP WRs. Since NEC is only supported + starting with firmware 2.2.232, we use constant-sized WRs + for older firmware. And, since MLX QPs only support SEND, we use + constant-sized WRs in this case. + + When stamping during NOP posting, do stamping following setting of + the NOP wqe valid bit. + + Signed-off-by: Michael S. Tsirkin + Signed-off-by: Jack Morgenstein + +commit 8e6b03bb781ee403e2aa3de9b9576ef42d919ce8 +commit c0aa89f0b295dd0c20b2ff2b1d2eca10cdc84f4b +Author: Michael S. Tsirkin +Date: Thu Aug 30 15:51:40 2007 +0300 + + IB/mlx4: shrinking WQE + + ConnectX supports shrinking wqe, such that a single WR can include + multiple units of wqe_shift. This way, WRs can differ in size, and + do not have to be a power of 2 in size, saving memory and speeding up + send WR posting. Unfortunately, if we do this wqe_index field in CQE + can't be used to look up the WR ID anymore, so do this only if + selective signalling is off. + + Further, on 32-bit platforms, we can't use vmap to make + the QP buffer virtually contigious. Thus we have to use + constant-sized WRs to make sure a WR is always fully within + a single page-sized chunk. + + Finally, we use WR with NOP opcode to avoid wrap-around + in the middle of WR. We set NoErrorCompletion bit to avoid getting + completions with error for NOP WRs. Since NEC is only supported + starting with firmware 2.2.232, we use constant-sized WRs + for older firmware. And, since MLX QPs only support SEND, we use + constant-sized WRs in this case. + + When stamping during NOP posting, do stamping following setting of + the NOP wqe valid bit. + + Signed-off-by: Michael S. Tsirkin + Signed-off-by: Jack Morgenstein + +Index: ofed_kernel-2.6.16_sles10/drivers/infiniband/hw/mlx4/cq.c +=================================================================== +--- ofed_kernel-2.6.16_sles10.orig/drivers/infiniband/hw/mlx4/cq.c 2008-01-22 13:19:40.000000000 +0200 ++++ ofed_kernel-2.6.16_sles10/drivers/infiniband/hw/mlx4/cq.c 2008-01-22 13:20:13.000000000 +0200 +@@ -353,6 +353,12 @@ static int mlx4_ib_poll_one(struct mlx4_ + is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR; + ++ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && ++ is_send)) { ++ printk(KERN_WARNING "Completion for NOP opcode detected!\n"); ++ return -EINVAL; ++ } ++ + if ((be32_to_cpu(cqe->my_qpn) & (1 << 23)) && !is_send) { + /* + * We do not have to take the XRC SRQ table lock here, +@@ -391,8 +397,10 @@ static int mlx4_ib_poll_one(struct mlx4_ + + if (is_send) { + wq = &(*cur_qp)->sq; +- wqe_ctr = be16_to_cpu(cqe->wqe_index); +- wq->tail += (u16) (wqe_ctr - (u16) wq->tail); ++ if (!(*cur_qp)->sq_signal_bits) { ++ wqe_ctr = be16_to_cpu(cqe->wqe_index); ++ wq->tail += (u16) (wqe_ctr - (u16) wq->tail); ++ } + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if (is_xrc_recv) { +Index: ofed_kernel-2.6.16_sles10/drivers/infiniband/hw/mlx4/mlx4_ib.h +=================================================================== +--- ofed_kernel-2.6.16_sles10.orig/drivers/infiniband/hw/mlx4/mlx4_ib.h 2008-01-22 13:19:40.000000000 +0200 ++++ ofed_kernel-2.6.16_sles10/drivers/infiniband/hw/mlx4/mlx4_ib.h 2008-01-22 13:20:13.000000000 +0200 +@@ -136,6 +136,8 @@ struct mlx4_ib_qp { + + u32 doorbell_qpn; + __be32 sq_signal_bits; ++ unsigned sq_next_wqe; ++ int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct mlx4_ib_wq sq; + +Index: ofed_kernel-2.6.16_sles10/drivers/infiniband/hw/mlx4/qp.c +=================================================================== +--- ofed_kernel-2.6.16_sles10.orig/drivers/infiniband/hw/mlx4/qp.c 2008-01-22 13:19:40.000000000 +0200 ++++ ofed_kernel-2.6.16_sles10/drivers/infiniband/hw/mlx4/qp.c 2008-01-22 13:31:45.000000000 +0200 +@@ -30,6 +30,7 @@ + * SOFTWARE. + */ + ++#include + #include + #include + +@@ -97,7 +98,7 @@ static int is_qp0(struct mlx4_ib_dev *de + + static void *get_wqe(struct mlx4_ib_qp *qp, int offset) + { +- if (qp->buf.nbufs == 1) ++ if (BITS_PER_LONG == 64 || qp->buf.nbufs == 1) + return qp->buf.u.direct.buf + offset; + else + return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + +@@ -116,16 +117,88 @@ static void *get_send_wqe(struct mlx4_ib + + /* + * Stamp a SQ WQE so that it is invalid if prefetched by marking the +- * first four bytes of every 64 byte chunk with 0xffffffff, except for +- * the very first chunk of the WQE. ++ * first four bytes of every 64 byte chunk with ++ * 0x7FFFFFF | (invalid_ownership_value << 31). ++ * ++ * When max WR is than or equal to the WQE size, ++ * as an optimization, we can stamp WQE with 0xffffffff, ++ * and skip the very first chunk of the WQE. + */ +-static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) ++static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) + { +- u32 *wqe = get_send_wqe(qp, n); ++ u32 *wqe; + int i; ++ int s; ++ int ind; ++ void *buf; ++ __be32 stamp; ++ ++ s = roundup(size, 1 << qp->sq.wqe_shift); ++ if (qp->sq_max_wqes_per_wr > 1) { ++ for (i = 0; i < s; i += 64) { ++ ind = (i >> qp->sq.wqe_shift) + n; ++ stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : ++ cpu_to_be32(0xffffffff); ++ buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); ++ wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); ++ *wqe = stamp; ++ } ++ } else { ++ buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); ++ for (i = 64; i < s; i += 64) { ++ wqe = buf + i; ++ *wqe = 0xffffffff; ++ } ++ } ++} ++ ++static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) ++{ ++ struct mlx4_wqe_ctrl_seg *ctrl; ++ struct mlx4_wqe_inline_seg *inl; ++ void *wqe; ++ int s; ++ ++ ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); ++ s = sizeof(struct mlx4_wqe_ctrl_seg); ++ ++ if (qp->ibqp.qp_type == IB_QPT_UD) { ++ struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; ++ struct mlx4_av *av = (struct mlx4_av *)dgram->av; ++ memset(dgram, 0, sizeof *dgram); ++ av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); ++ s += sizeof(struct mlx4_wqe_datagram_seg); ++ } ++ ++ /* Pad the remainder of the WQE with an inline data segment. */ ++ if (size > s) { ++ inl = wqe + s; ++ inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); ++ } ++ ctrl->srcrb_flags = 0; ++ ctrl->fence_size = size / 16; ++ /* ++ * Make sure descriptor is fully written before ++ * setting ownership bit (because HW can start ++ * executing as soon as we do). ++ */ ++ wmb(); + +- for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) +- wqe[i] = 0xffffffff; ++ ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | ++ (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); ++ ++ stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); ++} ++ ++/* Post NOP WQE to prevent wrap-around in the middle of WR */ ++static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) ++{ ++ unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); ++ if (unlikely(s < qp->sq_max_wqes_per_wr)) { ++ post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); ++ ind += s; ++ } ++ return ind; + } + + static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) +@@ -258,6 +331,7 @@ static int set_kernel_sq_size(struct mlx + { + struct ib_qp_cap *cap = &init_attr->cap; + enum ib_qp_type type = init_attr->qp_type; ++ int s; + int reserve = 0; + + /* Sanity check SQ size before proceeding */ +@@ -281,22 +355,69 @@ static int set_kernel_sq_size(struct mlx + reserve = 64; + } + +- qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * +- sizeof (struct mlx4_wqe_data_seg) + +- reserve, +- cap->max_inline_data + +- sizeof (struct mlx4_wqe_inline_seg)) + +- send_wqe_overhead(type))); +- qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift); +- qp->sq.max_gs = ((1 << qp->sq.wqe_shift) -reserve - send_wqe_overhead(type)) / +- sizeof (struct mlx4_wqe_data_seg); ++ s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg) + reserve, ++ cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + ++ send_wqe_overhead(type); + + /* +- * We need to leave 2 KB + 1 WQE of headroom in the SQ to +- * allow HW to prefetch. ++ * Hermon supports shrinking wqe, such that a single WR can include ++ * multiple units of wqe_shift. This way, WRs can differ in size, and ++ * do not have to be a power of 2 in size, saving memory and speeding up ++ * send WR posting. Unfortunately, if we do this wqe_index field in CQE ++ * can't be used to look up the WR ID anymore, so do this only if ++ * selective signalling is off. ++ * ++ * Further, on 32-bit platforms, we can't use vmap to make ++ * the QP buffer virtually contigious. Thus we have to use ++ * constant-sized WRs to make sure a WR is always fully within ++ * a single page-sized chunk. ++ * ++ * Finally, we use NOP opcode to avoid wrap-around in the middle of WR. ++ * We set NEC bit to avoid getting completions with error for NOP WRs. ++ * Since NEC is only supported starting with firmware 2.2.232, ++ * we use constant-sized WRs for older firmware. ++ * ++ * And, since MLX QPs only support SEND, we use constant-sized WRs in this ++ * case. ++ * ++ * We look for the smallest value of wqe_shift such that the resulting ++ * number of wqes does not exceed device capabilities. ++ * ++ * We set WQE size to at least 64 bytes, this way stamping invalidates each WQE. + */ +- qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift); +- qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); ++ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && ++ qp->sq_signal_bits && BITS_PER_LONG == 64 && ++ type != IB_QPT_SMI && type != IB_QPT_GSI) ++ qp->sq.wqe_shift = ilog2(64); ++ else ++ qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); ++ ++ for (;;) { ++ if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz) ++ return -EINVAL; ++ ++ qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1 << qp->sq.wqe_shift); ++ ++ /* ++ * We need to leave 2 KB + 1 WR of headroom in the SQ to ++ * allow HW to prefetch. ++ */ ++ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; ++ qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * ++ qp->sq_max_wqes_per_wr + ++ qp->sq_spare_wqes); ++ ++ if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) ++ break; ++ ++ if (qp->sq_max_wqes_per_wr <= 1) ++ return -EINVAL; ++ ++ ++qp->sq.wqe_shift; ++ } ++ ++ qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - reserve - ++ send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg); + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); +@@ -309,8 +430,7 @@ static int set_kernel_sq_size(struct mlx + } + + cap->max_send_wr = qp->sq.max_post = +- min(qp->sq.wqe_cnt - qp->sq_spare_wqes, +- dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE); ++ (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; + cap->max_send_sge =min(qp->sq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); +@@ -360,6 +480,12 @@ static int create_qp_common(struct mlx4_ + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; ++ qp->sq_next_wqe = 0; ++ ++ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ++ qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); ++ else ++ qp->sq_signal_bits = 0; + + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + !!init_attr->srq || !!init_attr->xrc_domain , qp); +@@ -454,11 +580,6 @@ static int create_qp_common(struct mlx4_ + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + +- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) +- qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); +- else +- qp->sq_signal_bits = 0; +- + qp->mqp.event = mlx4_ib_qp_event; + + return 0; +@@ -969,7 +1090,7 @@ static int __mlx4_ib_modify_qp(struct ib + ctrl = get_send_wqe(qp, i); + ctrl->owner_opcode = cpu_to_be32(1 << 31); + +- stamp_send_wqe(qp, i); ++ stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); + } + } + +@@ -1022,6 +1143,7 @@ static int __mlx4_ib_modify_qp(struct ib + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; ++ qp->sq_next_wqe = 0; + if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC) + *qp->db.db = 0; + } +@@ -1356,13 +1478,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp + unsigned long flags; + int nreq; + int err = 0; +- int ind; +- int size; ++ unsigned ind; ++ int uninitialized_var(stamp); ++ int uninitialized_var(size); + int i; + + spin_lock_irqsave(&qp->sq.lock, flags); + +- ind = qp->sq.head; ++ ind = qp->sq_next_wqe; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { +@@ -1378,7 +1501,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp + } + + ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); +- qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; ++ qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + + ctrl->srcrb_flags = + (wr->send_flags & IB_SEND_SIGNALED ? +@@ -1511,16 +1634,23 @@ int mlx4_ib_post_send(struct ib_qp *ibqp + ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + ++ stamp = ind + qp->sq_spare_wqes; ++ ind += DIV_ROUND_UP(size * 16, 1 << qp->sq.wqe_shift); ++ + /* + * We can improve latency by not stamping the last + * send queue WQE until after ringing the doorbell, so + * only stamp here if there are still more WQEs to post. ++ * ++ * Same optimization applies to padding with NOP wqe ++ * in case of WQE shrinking (used to prevent wrap-around ++ * in the middle of WR). + */ +- if (wr->next) +- stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & +- (qp->sq.wqe_cnt - 1)); ++ if (wr->next) { ++ stamp_send_wqe(qp, stamp, size * 16); ++ ind = pad_wraparound(qp, ind); ++ } + +- ++ind; + } + + out: +@@ -1542,8 +1672,10 @@ out: + */ + mmiowb(); + +- stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & +- (qp->sq.wqe_cnt - 1)); ++ stamp_send_wqe(qp, stamp, size * 16); ++ ++ ind = pad_wraparound(qp, ind); ++ qp->sq_next_wqe = ind; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); +Index: ofed_kernel-2.6.16_sles10/drivers/net/mlx4/alloc.c +=================================================================== +--- ofed_kernel-2.6.16_sles10.orig/drivers/net/mlx4/alloc.c 2008-01-22 13:19:40.000000000 +0200 ++++ ofed_kernel-2.6.16_sles10/drivers/net/mlx4/alloc.c 2008-01-22 13:20:13.000000000 +0200 +@@ -152,6 +152,19 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, + + memset(buf->u.page_list[i].buf, 0, PAGE_SIZE); + } ++ ++ if (BITS_PER_LONG == 64) { ++ struct page **pages; ++ pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); ++ if (!pages) ++ goto err_free; ++ for (i = 0; i < buf->nbufs; ++i) ++ pages[i] = virt_to_page(buf->u.page_list[i].buf); ++ buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); ++ kfree(pages); ++ if (!buf->u.direct.buf) ++ goto err_free; ++ } + } + + return 0; +@@ -171,6 +184,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, + dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf, + buf->u.direct.map); + else { ++ if (BITS_PER_LONG == 64) ++ vunmap(buf->u.direct.buf); ++ + for (i = 0; i < buf->nbufs; ++i) + if (buf->u.page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, +Index: ofed_kernel-2.6.16_sles10/include/linux/mlx4/device.h +=================================================================== +--- ofed_kernel-2.6.16_sles10.orig/include/linux/mlx4/device.h 2008-01-22 13:19:40.000000000 +0200 ++++ ofed_kernel-2.6.16_sles10/include/linux/mlx4/device.h 2008-01-22 13:20:13.000000000 +0200 +@@ -134,6 +134,11 @@ enum { + MLX4_STAT_RATE_OFFSET = 5 + }; + ++static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) ++{ ++ return (major << 32) | (minor << 16) | subminor; ++} ++ + struct mlx4_caps { + u64 fw_ver; + int num_ports; +@@ -193,7 +198,7 @@ struct mlx4_buf_list { + }; + + struct mlx4_buf { +- union { ++ struct { + struct mlx4_buf_list direct; + struct mlx4_buf_list *page_list; + } u; +Index: ofed_kernel-2.6.16_sles10/include/linux/mlx4/qp.h +=================================================================== +--- ofed_kernel-2.6.16_sles10.orig/include/linux/mlx4/qp.h 2008-01-22 13:19:40.000000000 +0200 ++++ ofed_kernel-2.6.16_sles10/include/linux/mlx4/qp.h 2008-01-22 13:20:13.000000000 +0200 +@@ -155,7 +155,11 @@ struct mlx4_qp_context { + u32 reserved5[10]; + }; + ++/* Which firmware version adds support for NEC (NoErrorCompletion) bit */ ++#define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) ++ + enum { ++ MLX4_WQE_CTRL_NEC = 1 << 29, + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICITED = 1 << 1, diff --git a/branches/ConnectX/hw/mlx4/readme.txt b/branches/ConnectX/hw/mlx4/readme.txt new file mode 100644 index 00000000..d7cfd89f --- /dev/null +++ b/branches/ConnectX/hw/mlx4/readme.txt @@ -0,0 +1,59 @@ + Mellanox ConnectX HCA Support. + +1. General + This directory contains a new driver, supporting Mellanox Tech. ConnectX HCAs. + +2. Driver description + The driver was not developed from scratch. It was ported from Linux OFED multiprotocol MLX4 driver. + The Linux-wise look of the code was kept intentionally to facilitate applying patches from Linux driver. + The driver SW consists of MLX4_HCA and MLX4_BUS drivers and MLX4U dll. + +2.1. MLX4_HCA.SYS (hw\mlx4\kernel\hca) + It is a shim, adjusting IBBUS to low-level driver. It was originally written in WDF model, + but i didn't find a simple way to connect it to IBBUS with its current way of creating ULP devices + and returned it temporary to WDM model. + The true solution is to convert IBBUS into an upper filter driver of MLX4_HCA and MTHCA, + which, hopefully will be done in the next release. + The relations of MLX4_HCA with IBBUS are completely the same as of MTHCA driver. + +2.2. MLX4_BUS.SYS (hw\mlx4\kernel: \core, \ib, \net and \bus) + It is the low-level driver, written in WDF model. + It starts, when PnP Manager reveals a ConnectX HCA, initializes the chip and creates MLX4_HCA virtual device. + It exports an interface for MLX4_HCA driver, which the latter gets on its startup and uses to implement IBBUS requests. + +2.3. MLX4U.DLL (hw\mlx4\user) + Does the same as MTHCAU.DLL. + +3. Driver current state + The driver is found in debugging phase now. A lot of things do not work yet. + Things, that have been run, are: + - vstat, opensm; + - performance tests; + - IPoIB, WSD. + +4. Building driver + We are now changing the stack so as to enable its building both with DDK and WDK. This work is not accomplished yet. + So i do all the development for now with WDK 6000 on x64 platform. + I succeed to compile all kernel components and most user ones with this WDK. The rest applications i compile with DDK. + +5. Driver installation + I installed driver only manually so far (with the help of Device Manager). + After installing MLX4_BUS and MLX4_hCA drivers, IBBUS can be added by running + devman install ib_bus.inf {94f41ced-78eb-407c-b5df-958040af0fd8} + +6. Known problems + +6.1. Not implemented + - MSI-X support. + - Work with DMA addresses and not with physical ones (it requires a very serious change!); + +6.2. Performance + - Command go-bit timeout is set to CMD_WAIT_USECS. Not clear the influence. + - Radix tree is implemented via COMPLIB calls, that seems like none used. It can be slow and even buggy. + - Different things can be done in built_in assempbler (e.g., bit operations like fls()). + +6.3. WPP support disabled. Problems: + - at compilation phase; + - Linux prints in MLX4_BUS are not supported by WPP (mlx4_err, mlx4_dbg, dev_err, dev_info ...); + +6.4 (General) Several places are marked with '// TODO:'. They have to be reviewed from time to time, especially - upon problems. diff --git a/branches/ConnectX/hw/mlx4/user/Makefile b/branches/ConnectX/hw/mlx4/user/Makefile new file mode 100644 index 00000000..bffacaa7 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/Makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\inc\openib.def diff --git a/branches/ConnectX/hw/mlx4/user/SOURCES b/branches/ConnectX/hw/mlx4/user/SOURCES new file mode 100644 index 00000000..7f5e5576 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/SOURCES @@ -0,0 +1,65 @@ +TRUNK=..\..\.. + +!if $(FREEBUILD) +TARGETNAME=mlx4u +!else +TARGETNAME=mlx4ud +!endif + +TARGETPATH=$(TRUNK)\bin\user\obj$(BUILD_ALT_DIR) +TARGETTYPE=DYNLINK +DLLDEF=$O\mlx4.def +USE_MSVCRT=1 +DLLENTRY=DllMain + +!if $(FREEBUILD) +#ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + +SOURCES= \ + buf.c \ + cq.c \ + dbrec.c \ + mlx4.c \ + mlx4_debug.c \ + qp.c \ + srq.c \ + verbs.c + +INCLUDES= \ + ..\inc; \ + $(TRUNK)\inc\user; \ + $(TRUNK)\inc\complib; \ + $(TRUNK)\inc\user\complib; \ + $(TRUNK)\inc; \ + +USER_C_FLAGS=$(USER_C_FLAGS) /DCL_NO_TRACK_MEM + +TARGETLIBS=\ + $(SDK_LIB_PATH)\user32.lib \ + $(SDK_LIB_PATH)\kernel32.lib \ + $(SDK_LIB_PATH)\Advapi32.lib \ +!if $(FREEBUILD) + $(TARGETPATH)\*\complib.lib \ + $(TARGETPATH)\*\ibal.lib +!else + $(TARGETPATH)\*\complibd.lib \ + $(TARGETPATH)\*\ibald.lib +!endif + +#LINKER_FLAGS=/MAP /MAPINFO:LINES + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING -DWPP_OLDCC + + +RUN_WPP= $(SOURCES) -ext:.c.h -dll\ + -scan:mlx4_debug.h \ + -func:MLX4_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:MLX4_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +MSC_WARNING_LEVEL= /W4 diff --git a/branches/ConnectX/hw/mlx4/user/buf.c b/branches/ConnectX/hw/mlx4/user/buf.c new file mode 100644 index 00000000..a54b38dd --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/buf.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2006, 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" + +int mlx4_alloc_buf(struct mlx4_buf *buf, int size, int page_size) +{ + int ret; + + ret = posix_memalign(&buf->buf, page_size, align(size, page_size)); + if (ret) + return ret; + + buf->length = size; + + return 0; +} + +void mlx4_free_buf(struct mlx4_buf *buf) +{ + VirtualFree(buf->buf, 0, MEM_RELEASE); +} diff --git a/branches/ConnectX/hw/mlx4/user/cq.c b/branches/ConnectX/hw/mlx4/user/cq.c new file mode 100644 index 00000000..0a4d6a80 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/cq.c @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "doorbell.h" +#include "mlx4_debug.h" + +#if defined(EVENT_TRACING) +#include "cq.tmh" +#endif + +enum { + MLX4_CQ_DOORBELL = 0x20 +}; + +enum { + CQ_OK = 0, + CQ_EMPTY = -1, + CQ_POLL_ERR = -2 +}; + +#define MLX4_CQ_DB_REQ_NOT_SOL (1 << 24) +#define MLX4_CQ_DB_REQ_NOT (2 << 24) + +enum { + MLX4_CQE_OWNER_MASK = 0x80, + MLX4_CQE_IS_SEND_MASK = 0x40, + MLX4_CQE_OPCODE_MASK = 0x1f +}; + +enum { + MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +struct mlx4_cqe { + uint32_t my_qpn; + uint32_t immed_rss_invalid; + uint32_t g_mlpath_rqpn; + uint8_t sl; + uint8_t reserved1; + uint16_t rlid; + uint32_t reserved2; + uint32_t byte_cnt; + uint16_t wqe_index; + uint16_t checksum; + uint8_t reserved3[3]; + uint8_t owner_sr_opcode; +}; + +struct mlx4_err_cqe { + uint32_t my_qpn; + uint32_t reserved1[5]; + uint16_t wqe_index; + uint8_t vendor_err; + uint8_t syndrome; + uint8_t reserved2[3]; + uint8_t owner_sr_opcode; +}; + +static struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry) +{ + return (struct mlx4_cqe *)(cq->buf.buf + entry * MLX4_CQ_ENTRY_SIZE); +} + +static void *get_sw_cqe(struct mlx4_cq *cq, int n) +{ + struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); + + return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + !!(n & (cq->ibv_cq.cqe + 1))) ? NULL : cqe; +} + +static struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq) +{ + return get_sw_cqe(cq, cq->cons_index); +} + +static void update_cons_index(struct mlx4_cq *cq) +{ + *cq->set_ci_db = htonl(cq->cons_index & 0xffffff); +} + +static void mlx4_handle_error_cqe(struct mlx4_err_cqe *cqe, ib_wc_t *wc) +{ + if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) + printf(PFX "local QP operation err " + "(QPN %06x, WQE index %x, vendor syndrome %02x, " + "opcode = %02x)\n", + htonl(cqe->my_qpn), htonl(cqe->wqe_index), + cqe->vendor_err, + cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); + + switch (cqe->syndrome) { + case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WCS_LOCAL_LEN_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WCS_LOCAL_OP_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WCS_LOCAL_PROTECTION_ERR; + break; + case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: + wc->status = IB_WCS_WR_FLUSHED_ERR; + break; + case MLX4_CQE_SYNDROME_MW_BIND_ERR: + wc->status = IB_WCS_MEM_WINDOW_BIND_ERR; + break; + case MLX4_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WCS_BAD_RESP_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WCS_LOCAL_ACCESS_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WCS_REM_INVALID_REQ_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WCS_REM_ACCESS_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WCS_REM_OP_ERR; + break; + case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WCS_TIMEOUT_RETRY_ERR; + break; + case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WCS_RNR_RETRY_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: + wc->status = IB_WCS_REM_ABORT_ERR; + break; + } + + wc->vendor_specific = cqe->vendor_err; +} + +static int mlx4_poll_one(struct mlx4_cq *cq, struct mlx4_qp **cur_qp, ib_wc_t *wc) +{ + struct mlx4_wq *wq; + struct mlx4_cqe *cqe; + struct mlx4_srq *srq = NULL; + uint32_t qpn; + uint32_t srqn; + uint16_t wqe_index; + int is_error; + int is_send; + int is_xrc_recv = 0; + + cqe = next_cqe_sw(cq); + if (!cqe) + return CQ_EMPTY; + + ++cq->cons_index; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + qpn = ntohl(cqe->my_qpn); + + is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; + is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR; + + if (qpn & MLX4_XRC_QPN_BIT && !is_send) { + srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff; + /* + * We do not have to take the XRC SRQ table lock here, + * because CQs will be locked while XRC SRQs are removed + * from the table. + */ + srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn); + if (!srq) + return CQ_POLL_ERR; + is_xrc_recv = 1; + } else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), + qpn & 0xffffff); + if (!*cur_qp) + return CQ_POLL_ERR; + } + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_index = ntohs(cqe->wqe_index); + wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail); + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if (is_xrc_recv) { + wqe_index = htons(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_index]; + mlx4_free_srq_wqe(srq, wqe_index); + } else if ((*cur_qp)->ibv_qp.srq) { + srq = to_msrq((*cur_qp)->ibv_qp.srq); + wqe_index = htons(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_index]; + mlx4_free_srq_wqe(srq, wqe_index); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + + if (is_send) { + wc->recv.ud.recv_opt = 0; + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_OPCODE_RDMA_WRITE_IMM: + wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + case MLX4_OPCODE_RDMA_WRITE: + wc->wc_type = IB_WC_RDMA_WRITE; + break; + case MLX4_OPCODE_SEND_IMM: + wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + case MLX4_OPCODE_SEND: + wc->wc_type = IB_WC_SEND; + break; + case MLX4_OPCODE_RDMA_READ: + wc->wc_type = IB_WC_RDMA_READ; + wc->length = ntohl(cqe->byte_cnt); + break; + case MLX4_OPCODE_ATOMIC_CS: + wc->wc_type = IB_WC_COMPARE_SWAP; + wc->length = 8; + break; + case MLX4_OPCODE_ATOMIC_FA: + wc->wc_type = IB_WC_FETCH_ADD; + wc->length = 8; + break; + case MLX4_OPCODE_BIND_MW: + wc->wc_type = IB_WC_MW_BIND; + break; + default: + /* assume it's a send completion */ + wc->wc_type = IB_WC_SEND; + break; + } + } else { + wc->length = ntohl(cqe->byte_cnt); + + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: + wc->wc_type = IB_WC_RECV; + wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + wc->recv.ud.immediate_data = cqe->immed_rss_invalid; + break; + case MLX4_RECV_OPCODE_SEND: + wc->wc_type = IB_WC_RECV; + wc->recv.ud.recv_opt = 0; + break; + case MLX4_RECV_OPCODE_SEND_IMM: + wc->wc_type = IB_WC_RECV; + wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + wc->recv.ud.immediate_data = cqe->immed_rss_invalid; + break; + } + + wc->recv.ud.remote_lid = cqe->rlid; + wc->recv.ud.remote_sl = cqe->sl >> 4; + wc->recv.ud.remote_qp = cqe->g_mlpath_rqpn & 0xffffff00; + wc->recv.ud.path_bits = (uint8_t)(cqe->g_mlpath_rqpn & 0x7f); + wc->recv.ud.recv_opt |= cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0; + wc->recv.ud.pkey_index = (uint16_t)(ntohl(cqe->immed_rss_invalid) & 0x7f); + } + + if (is_error) + mlx4_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); + else + wc->status = IB_WCS_SUCCESS; + + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_CQ, ("qpn %#x, wr_id %#I64x, ix %d, cons_index %d \n", + qpn, wc->wr_id, ntohs(cqe->wqe_index), cq->cons_index - 1 )); + + return CQ_OK; +} + +ib_api_status_t +mlx4_poll_cq( + IN const void* h_cq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist) +{ + struct mlx4_cq *cq = to_mcq((struct ibv_cq *)h_cq); + struct mlx4_qp *qp = NULL; + ib_wc_t *wc_p, **next_pp; + int npolled = 0; + int err = CQ_OK; + ib_api_status_t status = IB_SUCCESS; + + pthread_spin_lock(&cq->lock); + + // loop through CQ + next_pp = pp_done_wclist; + wc_p = *pp_free_wclist; + while( wc_p ) { + err = mlx4_poll_one(cq, &qp, wc_p); + if (err != CQ_OK) + break; + + // prepare for the next step + *next_pp = wc_p; + next_pp = &wc_p->p_next; + wc_p = wc_p->p_next; + ++npolled; + } + + // prepare the results + *pp_free_wclist = wc_p; /* Set the head of the free list. */ + *next_pp = NULL; /* Clear the tail of the done list. */ + + if (npolled) + update_cons_index(cq); + + pthread_spin_unlock(&cq->lock); + + if (err == CQ_POLL_ERR) + status = IB_ERROR; + else if (err == CQ_EMPTY && npolled == 0 ) + status = IB_NOT_FOUND; + + return status; +} + +ib_api_status_t +mlx4_arm_cq ( + IN const void *h_cq, + IN const boolean_t solicited) +{ + struct ibv_cq *ibvcq = (struct ibv_cq *)h_cq; + struct mlx4_cq *cq = to_mcq(ibvcq); + uint32_t doorbell[2]; + uint32_t sn; + uint32_t ci; + uint32_t cmd; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT; + + *cq->arm_db = htonl(sn << 28 | cmd | ci); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = htonl(sn << 28 | cmd | cq->cqn); + doorbell[1] = htonl(ci); + + mlx4_write64(doorbell, to_mctx(ibvcq->context), MLX4_CQ_DOORBELL); + + return IB_SUCCESS; +} + +#if 0 +// this function could be called in Windows +// we do it in kernel +void mlx4_cq_event(struct ibv_cq *cq) +{ + to_mcq(cq)->arm_sn++; +} +#endif + +void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) +{ + struct mlx4_cqe *cqe, *dest; + uint32_t prod_index; + uint8_t owner_bit; + int nfreed = 0; + int is_xrc_srq = 0; + + if (srq && srq->ibv_srq.xrc_cq) + is_xrc_srq = 1; + + pthread_spin_lock(&cq->lock); + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) + if (prod_index == cq->cons_index + cq->ibv_cq.cqe) + break; + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); + if (is_xrc_srq && + (ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) && + !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) { + mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index)); + ++nfreed; + } else if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) { + if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) + mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index)); + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe); + owner_bit = (uint8_t)(dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK); + memcpy(dest, cqe, sizeof *cqe); + dest->owner_sr_opcode = (uint8_t)(owner_bit | + (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK)); + } + } + + if (nfreed) { + cq->cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + update_cons_index(cq); + } + + pthread_spin_unlock(&cq->lock); +} + +void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int old_cqe) +{ + UNREFERENCED_PARAMETER(cq); + UNREFERENCED_PARAMETER(buf); + UNREFERENCED_PARAMETER(old_cqe); +} diff --git a/branches/ConnectX/hw/mlx4/user/dbrec.c b/branches/ConnectX/hw/mlx4/user/dbrec.c new file mode 100644 index 00000000..d4ba0659 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/dbrec.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" + +#pragma warning( disable:4200 ) +struct mlx4_db_page { + struct mlx4_db_page *prev, *next; + struct mlx4_buf buf; + int num_db; + int use_cnt; + unsigned long free[]; +}; +#pragma warnning( default:4200) + +static const int db_size[] = { + 8, // MLX4_DB_TYPE_CQ + 4, // MLX4_DB_TYPE_RQ +}; + +static struct mlx4_db_page *__add_page(struct mlx4_context *context, + enum mlx4_db_type type) +{ + struct mlx4_db_page *page; + int ps = context->ibv_ctx.page_size; + int pp; + uint32_t i; + + pp = ps / db_size[type]; + + page = malloc(sizeof *page + pp / 8); + if (!page) + return NULL; + + if (mlx4_alloc_buf(&page->buf, ps, ps)) { + free(page); + return NULL; + } + + page->num_db = pp; + page->use_cnt = 0; + for (i = 0; i < pp / (sizeof (long) * 8); ++i) + page->free[i] = (unsigned long)~0; + + page->prev = NULL; + page->next = context->db_list[type]; + context->db_list[type] = page; + if (page->next) + page->next->prev = page; + + return page; +} + +uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type) +{ + struct mlx4_db_page *page; + uint32_t *db = NULL; + int i, j; + + pthread_mutex_lock(&context->db_list_mutex); + + for (page = context->db_list[type]; page; page = page->next) + if (page->use_cnt < page->num_db) + goto found; + + page = __add_page(context, type); + if (!page) + goto out; + +found: + ++page->use_cnt; + + for (i = 0; !page->free[i]; ++i) + /* nothing */; + + j = ffsl(page->free[i]); + page->free[i] &= ~(1UL << (j - 1)); + db = (uint32_t *)(page->buf.buf + (i * 8 * sizeof (long) + (j - 1)) * db_size[type]); + +out: + pthread_mutex_unlock(&context->db_list_mutex); + + return db; +} + +void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db) +{ + struct mlx4_db_page *page; + int ps = context->ibv_ctx.page_size; + int i; + + pthread_mutex_lock(&context->db_list_mutex); + + for (page = context->db_list[type]; page; page = page->next) + if (((uintptr_t) db & ~(ps - 1)) == (uintptr_t) page->buf.buf) + break; + + if (!page) + goto out; + + i = (int)(((uint8_t *) db - page->buf.buf) / db_size[type]); + page->free[i / (8 * sizeof (long))] |= 1UL << (i % (8 * sizeof (long))); + + if (!--page->use_cnt) { + if (page->prev) + page->prev->next = page->next; + else + context->db_list[type] = page->next; + if (page->next) + page->next->prev = page->prev; + + mlx4_free_buf(&page->buf); + free(page); + } + +out: + pthread_mutex_unlock(&context->db_list_mutex); +} diff --git a/branches/ConnectX/hw/mlx4/user/doorbell.h b/branches/ConnectX/hw/mlx4/user/doorbell.h new file mode 100644 index 00000000..ff0c1965 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/doorbell.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef DOORBELL_H +#define DOORBELL_H + +#ifdef _WIN64 + +static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset) +{ + *(volatile uint64_t *) (ctx->uar + offset) = *(volatile uint64_t *)val; +} + +#else + +static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset) +{ + pthread_spin_lock(&ctx->uar_lock); + *(volatile uint32_t *) (ctx->uar + offset) = val[0]; + *(volatile uint32_t *) (ctx->uar + offset + 4) = val[1]; + pthread_spin_unlock(&ctx->uar_lock); +} + +#endif + +#endif /* DOORBELL_H */ diff --git a/branches/ConnectX/hw/mlx4/user/l2w.h b/branches/ConnectX/hw/mlx4/user/l2w.h new file mode 100644 index 00000000..2c9cbfe7 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/l2w.h @@ -0,0 +1,151 @@ +#ifndef MLX4_L2W_H +#define MLX4_L2W_H + +// =========================================== +// INCLUDES +// =========================================== + +// OS +#include +#include + +#include + +#include +#include +#include +#include +#include + + +// =========================================== +// SUBSTITUTIONS +// =========================================== + +// Memory +#define memset cl_memset +#define memclr cl_memclr +#define memcpy cl_memcpy +#define malloc cl_malloc +#define calloc(x,y) cl_zalloc((x)*(y)) +#define free cl_free + +// ByteSwap +#define htons cl_hton16 +#define htonl cl_hton32 +#define htonll cl_hton64 + +#define ntohs cl_ntoh16 +#define ntohl cl_ntoh32 +#define ntohll cl_ntoh64 + +// Synchronization +#define pthread_mutex_t HANDLE +#define pthread_spinlock_t cl_spinlock_t + +#define pthread_spin_init(x,y) cl_spinlock_init(x) +#define pthread_spin_lock cl_spinlock_acquire +#define pthread_spin_unlock cl_spinlock_release + + +// =========================================== +// LITERALS +// =========================================== + + +// =========================================== +// TYPES +// =========================================== + +typedef uint8_t __u8; +typedef uint16_t __u16; +typedef uint32_t __u32; +typedef uint64_t __u64; + +typedef int32_t __s32; + + +// =========================================== +// MACROS +// =========================================== + + +// =========================================== +// FUNCTIONS +// =========================================== + +static inline int posix_memalign(void **memptr, int alignment, int size) +{ + UNREFERENCED_PARAMETER(alignment); + + *memptr = VirtualAlloc( NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE ); + if (*memptr) + return 0; + else + return ENOMEM; +} + +static inline int ffsl(uint32_t x) +{ + int r = 0; + + if (!x) + return 0; + if (!(x & 0x0000ffffu)) { + x >>= 16; + r += 16; + } + if (!(x & 0x000000ffu)) { + x >>= 8; + r += 8; + } + if (!(x & 0x0000000fu)) { + x >>= 4; + r += 4; + } + if (!(x & 0x000000003u)) { + x >>= 2; + r += 2; + } + if (!(x & 0x00000001u)) { + x >>= 1; + r += 1; + } + return r+1; +} + +static inline void pthread_mutex_lock(HANDLE *mutex) +{ + WaitForSingleObject(*mutex, INFINITE); +} + +static inline void pthread_mutex_unlock(HANDLE *mutex) +{ + ReleaseMutex(*mutex); +} + +// =========================================== +// ARCHITECTURE DEFINITIONS +// =========================================== + +/* + * Architecture-specific defines. Currently, an architecture is + * required to implement the following operations: + * + * mb() - memory barrier. No loads or stores may be reordered across + * this macro by either the compiler or the CPU. + * rmb() - read memory barrier. No loads may be reordered across this + * macro by either the compiler or the CPU. + * wmb() - write memory barrier. No stores may be reordered across + * this macro by either the compiler or the CPU. + * wc_wmb() - flush write combine buffers. No write-combined writes + * will be reordered across this macro by either the compiler or + * the CPU. + */ + +#define mb MemoryBarrier +#define rmb mb +#define wmb mb +#define wc_wmb mb + +#endif diff --git a/branches/ConnectX/hw/mlx4/user/mlx4.c b/branches/ConnectX/hw/mlx4/user/mlx4.c new file mode 100644 index 00000000..0787f90b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/mlx4.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "mlx4.h" +#include "mx_abi.h" + +#ifndef PCI_VENDOR_ID_MELLANOX +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#endif + +#define HCA(v, d) \ + {PCI_VENDOR_ID_##v, \ + d } + +struct { + unsigned vendor; + unsigned device; +} hca_table[] = { + HCA(MELLANOX, 0x6340), /* MT25408 "Hermon" SDR */ + HCA(MELLANOX, 0x634a), /* MT25408 "Hermon" DDR */ + HCA(MELLANOX, 0x6354), /* MT25408 "Hermon" QDR */ + HCA(MELLANOX, 0x6732), /* MT25408 "Hermon" DDR PCIe gen2 */ + HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */ + HCA(MELLANOX, 0x0191), /* MT25408 "Hermon" livefish mode */ +}; + + +struct ibv_context * mlx4_alloc_context() +{ + struct mlx4_context *context; + + /* allocate context */ + context = cl_zalloc(sizeof *context); + if (!context) + goto end; + + context->qp_table_mutex = CreateMutex(NULL, FALSE, NULL); + if (!context->qp_table_mutex) + goto err_qp_mutex; + + context->xrc_srq_table_mutex = CreateMutex(NULL, FALSE, NULL); + if (!context->xrc_srq_table_mutex) + goto err_xrc_mutex; + + context->db_list_mutex = CreateMutex(NULL, FALSE, NULL); + if (!context->db_list_mutex) + goto err_db_mutex; + + context->ibv_ctx.mutex = CreateMutex(NULL, FALSE, NULL); + if (!context->ibv_ctx.mutex) + goto err_ctx_mutex; + + if (cl_spinlock_init(&context->uar_lock)) + goto err_uar_spinlock; + + if (cl_spinlock_init(&context->bf_lock)) + goto err_bf_spinlock; + + return &context->ibv_ctx; + +err_bf_spinlock: + cl_spinlock_destroy(&context->uar_lock); +err_uar_spinlock: + CloseHandle(context->ibv_ctx.mutex); +err_ctx_mutex: + CloseHandle(context->db_list_mutex); +err_db_mutex: + CloseHandle(context->xrc_srq_table_mutex); +err_xrc_mutex: + CloseHandle(context->qp_table_mutex); +err_qp_mutex: + cl_free(context); +end: + return NULL; + +} + +struct ibv_context * mlx4_fill_context(struct ibv_context *ctx, struct ibv_get_context_resp *p_resp) +{ + struct mlx4_context *context = to_mctx(ctx); + SYSTEM_INFO sys_info; + int i; + + /* check device type */ + for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) + if (p_resp->vend_id == hca_table[i].vendor && + p_resp->dev_id == hca_table[i].device) + goto found; + goto err_dev_type; + +found: + context->num_qps = p_resp->qp_tab_size; + context->qp_table_shift = ffsl(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; + context->qp_table_mask = (1 << context->qp_table_shift) - 1; + + for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + + context->num_xrc_srqs = p_resp->qp_tab_size; + context->xrc_srq_table_shift = ffsl(context->num_xrc_srqs) - 1 + - MLX4_XRC_SRQ_TABLE_BITS; + context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1; + + for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i) + context->xrc_srq_table[i].refcnt = 0; + + for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) + context->db_list[i] = NULL; + + context->uar = (uint8_t *)(uintptr_t)p_resp->uar_addr; + context->bf_page = (uint8_t *)(uintptr_t)p_resp->bf_page; + context->bf_buf_size = p_resp->bf_buf_size; + context->bf_offset = p_resp->bf_offset; + + context->max_qp_wr = p_resp->max_qp_wr; + context->max_sge = p_resp->max_sge; + context->max_cqe = p_resp->max_cqe; + + GetSystemInfo(&sys_info); + context->ibv_ctx.page_size = sys_info.dwPageSize; + context->ibv_ctx.p_hca_attr = NULL; + + return &context->ibv_ctx; + +err_dev_type: + mlx4_free_context(&context->ibv_ctx); + return NULL; +} + +void mlx4_free_context(struct ibv_context *ctx) +{ + struct mlx4_context *context = to_mctx(ctx); + + cl_spinlock_destroy(&context->bf_lock); + cl_spinlock_destroy(&context->uar_lock); + CloseHandle(context->ibv_ctx.mutex); + CloseHandle(context->db_list_mutex); + CloseHandle(context->xrc_srq_table_mutex); + CloseHandle(context->qp_table_mutex); + if (context->ibv_ctx.p_hca_attr) + cl_free(context->ibv_ctx.p_hca_attr); + cl_free(context); +} + +__declspec(dllexport) ib_api_status_t +uvp_get_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + CL_ASSERT(p_uvp); + + /* + * Version of the header file this interface export can handle + */ + p_uvp->version = 0x101; + p_uvp->guid = 0x12345678; + + + /* + * CA Management + */ + p_uvp->pre_open_ca = mlx4_pre_open_ca; + p_uvp->post_open_ca = mlx4_post_open_ca; + p_uvp->pre_query_ca = mlx4_pre_query_ca; + p_uvp->post_query_ca = mlx4_post_query_ca; + p_uvp->pre_modify_ca = NULL; + p_uvp->post_modify_ca = NULL; + p_uvp->pre_close_ca = NULL; + p_uvp->post_close_ca = mlx4_post_close_ca; + + + /* + * Protection Domain + */ + p_uvp->pre_allocate_pd = mlx4_pre_alloc_pd; + p_uvp->post_allocate_pd = mlx4_post_alloc_pd; + p_uvp->pre_deallocate_pd = NULL; + p_uvp->post_deallocate_pd = mlx4_post_free_pd; + + + /* + * SRQ Management Verbs + */ + p_uvp->pre_create_srq = mlx4_pre_create_srq; + p_uvp->post_create_srq = mlx4_post_create_srq; + p_uvp->pre_query_srq = NULL; + p_uvp->post_query_srq = NULL; + p_uvp->pre_modify_srq = NULL; + p_uvp->post_modify_srq = NULL; + p_uvp->pre_destroy_srq = NULL; + p_uvp->post_destroy_srq = mlx4_post_destroy_srq; + + + /* + * QP Management Verbs + */ + p_uvp->pre_create_qp = mlx4_pre_create_qp; + p_uvp->post_create_qp = mlx4_post_create_qp; + p_uvp->pre_modify_qp = mlx4_pre_modify_qp; + p_uvp->post_modify_qp = mlx4_post_modify_qp; + p_uvp->pre_query_qp = NULL; + p_uvp->post_query_qp = mlx4_post_query_qp; + p_uvp->pre_destroy_qp = mlx4_pre_destroy_qp; + p_uvp->post_destroy_qp = mlx4_post_destroy_qp; + p_uvp->nd_modify_qp = mlx4_nd_modify_qp; + p_uvp->nd_get_qp_state = mlx4_nd_get_qp_state; + + + /* + * Completion Queue Management Verbs + */ + p_uvp->pre_create_cq = mlx4_pre_create_cq; + p_uvp->post_create_cq = mlx4_post_create_cq; + p_uvp->pre_query_cq = mlx4_pre_query_cq; + p_uvp->post_query_cq = NULL; + p_uvp->pre_resize_cq = NULL; + p_uvp->post_resize_cq = NULL; + p_uvp->pre_destroy_cq = NULL; + p_uvp->post_destroy_cq = mlx4_post_destroy_cq; + + + /* + * AV Management + */ + p_uvp->pre_create_av = mlx4_pre_create_ah; + p_uvp->post_create_av = NULL; + p_uvp->pre_query_av = mlx4_pre_query_ah; + p_uvp->post_query_av = mlx4_post_query_ah; + p_uvp->pre_modify_av = mlx4_pre_modify_ah; + p_uvp->post_modify_av = NULL; + p_uvp->pre_destroy_av = mlx4_pre_destroy_ah; + p_uvp->post_destroy_av = NULL; + + + /* + * Memory Region / Window Management Verbs + */ + p_uvp->pre_create_mw = NULL; + p_uvp->post_create_mw = NULL; + p_uvp->pre_query_mw = NULL; + p_uvp->post_query_mw = NULL; + p_uvp->pre_destroy_mw = NULL; + p_uvp->post_destroy_mw = NULL; + + + /* + * Multicast Support Verbs + */ + p_uvp->pre_attach_mcast = NULL; + p_uvp->post_attach_mcast = NULL; + p_uvp->pre_detach_mcast = NULL; + p_uvp->post_detach_mcast = NULL; + + + /* + * OS bypass (send, receive, poll/notify cq) + */ + p_uvp->post_send = mlx4_post_send; + p_uvp->post_recv = mlx4_post_recv; + p_uvp->post_srq_recv = mlx4_post_srq_recv; + p_uvp->poll_cq = mlx4_poll_cq; + p_uvp->rearm_cq = mlx4_arm_cq; + p_uvp->rearm_n_cq = NULL; /* __enable_ncomp_cq_notify: Not implemented */; + p_uvp->peek_cq = NULL; /* __peek_cq: Not implemented */ + p_uvp->bind_mw = NULL; /* __bind_mw: Not implemented */ + +#ifdef HAVE_IBV_XRC_OPS + /* + * XRC Management Verbs + */ + p_uvp->pre_create_xrc_srq = mlx4_pre_create_xrc_srq; + p_uvp->post_create_xrc_srq = mlx4_post_create_xrc_srq; + p_uvp->pre_open_xrc_domain = mlx4_pre_open_xrc_domain; + p_uvp->post_open_xrc_domain = mlx4_post_open_xrc_domain; + p_uvp->pre_close_xrc_domain = NULL; + p_uvp->post_close_xrc_domain = mlx4_post_close_xrc_domain; + p_uvp->pre_create_xrc_rcv_qp = NULL; + p_uvp->post_create_xrc_rcv_qp = NULL; + p_uvp->pre_modify_xrc_rcv_qp = NULL; + p_uvp->post_modify_xrc_rcv_qp = NULL; + p_uvp->pre_query_xrc_rcv_qp = NULL; + p_uvp->post_query_xrc_rcv_qp = NULL; + p_uvp->pre_reg_xrc_rcv_qp = NULL; + p_uvp->post_reg_xrc_rcv_qp = NULL; + p_uvp->pre_unreg_xrc_rcv_qp = NULL; + p_uvp->post_unreg_xrc_rcv_qp = NULL; +#endif + + return IB_SUCCESS; +} + + diff --git a/branches/ConnectX/hw/mlx4/user/mlx4.def b/branches/ConnectX/hw/mlx4/user/mlx4.def new file mode 100644 index 00000000..5ff6487b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/mlx4.def @@ -0,0 +1,6 @@ +#if DEBUG +LIBRARY mlx4ud.dll +#else +LIBRARY mlx4u.dll +#endif + diff --git a/branches/ConnectX/hw/mlx4/user/mlx4.h b/branches/ConnectX/hw/mlx4/user/mlx4.h new file mode 100644 index 00000000..a074fef1 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/mlx4.h @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_H +#define MLX4_H + + +#include +#include + +#include "verbs.h" +#include "mx_abi.h" + +#include "l2w.h" + +#define PFX "mlx4: " + +#ifndef max +#define max(a,b) \ + ({ typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a > _b ? _a : _b; }) +#endif + +#ifndef min +#define min(a,b) \ + ({ typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a < _b ? _a : _b; }) +#endif + +enum { + MLX4_CQ_ENTRY_SIZE = 0x20 +}; + +enum { + MLX4_STAT_RATE_OFFSET = 5 +}; + +enum { + MLX4_QP_TABLE_BITS = 8, + MLX4_QP_TABLE_SIZE = 1 << MLX4_QP_TABLE_BITS, + MLX4_QP_TABLE_MASK = MLX4_QP_TABLE_SIZE - 1 +}; + +enum { + MLX4_XRC_SRQ_TABLE_BITS = 8, + MLX4_XRC_SRQ_TABLE_SIZE = 1 << MLX4_XRC_SRQ_TABLE_BITS, + MLX4_XRC_SRQ_TABLE_MASK = MLX4_XRC_SRQ_TABLE_SIZE - 1 +}; + +enum { + MLX4_XRC_QPN_BIT = (1 << 23) +}; + +enum mlx4_db_type { + MLX4_DB_TYPE_CQ, + MLX4_DB_TYPE_RQ, + MLX4_NUM_DB_TYPE +}; + +enum mlx4_opcode_type { + MLX4_OPCODE_NOP = 0x00, + MLX4_OPCODE_SEND_INVAL = 0x01, + MLX4_OPCODE_RDMA_WRITE = 0x08, + MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX4_OPCODE_SEND = 0x0a, + MLX4_OPCODE_SEND_IMM = 0x0b, + MLX4_OPCODE_LSO = 0x0e, + MLX4_OPCODE_RDMA_READ = 0x10, + MLX4_OPCODE_ATOMIC_CS = 0x11, + MLX4_OPCODE_ATOMIC_FA = 0x12, + MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, + MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, + MLX4_OPCODE_BIND_MW = 0x18, + MLX4_OPCODE_FMR = 0x19, + MLX4_OPCODE_LOCAL_INVAL = 0x1b, + MLX4_OPCODE_CONFIG_CMD = 0x1f, + + MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX4_RECV_OPCODE_SEND = 0x01, + MLX4_RECV_OPCODE_SEND_IMM = 0x02, + MLX4_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX4_CQE_OPCODE_ERROR = 0x1e, + MLX4_CQE_OPCODE_RESIZE = 0x16, + + MLX4_OPCODE_INVALID = 0xff +}; + +struct mlx4_db_page; + +struct mlx4_context { + struct ibv_context ibv_ctx; + + uint8_t *uar; + pthread_spinlock_t uar_lock; + + uint8_t *bf_page; + int bf_buf_size; + int bf_offset; + pthread_spinlock_t bf_lock; + + struct { + struct mlx4_qp **table; + int refcnt; + } qp_table[MLX4_QP_TABLE_SIZE]; + pthread_mutex_t qp_table_mutex; + int num_qps; + int qp_table_shift; + int qp_table_mask; + int max_qp_wr; + int max_sge; + int max_cqe; + + struct { + struct mlx4_srq **table; + int refcnt; + } xrc_srq_table[MLX4_XRC_SRQ_TABLE_SIZE]; + pthread_mutex_t xrc_srq_table_mutex; + int num_xrc_srqs; + int xrc_srq_table_shift; + int xrc_srq_table_mask; + + struct mlx4_db_page *db_list[MLX4_NUM_DB_TYPE]; + pthread_mutex_t db_list_mutex; +}; + +struct mlx4_buf { + uint8_t *buf; + int length; +}; + +struct mlx4_pd { + struct ibv_pd ibv_pd; + uint32_t pdn; +}; + +struct mlx4_cq { + struct ibv_cq ibv_cq; + struct mlx4_buf buf; + pthread_spinlock_t lock; + uint32_t cqn; + uint32_t cons_index; + uint32_t *set_ci_db; + uint32_t *arm_db; + int arm_sn; +}; + +struct mlx4_srq { + struct ibv_srq ibv_srq; + struct mlx4_buf buf; + pthread_spinlock_t lock; + uint64_t *wrid; + uint32_t srqn; + int max; + int max_gs; + int wqe_shift; + int head; + int tail; + uint32_t *db; + uint16_t counter; +}; + +struct mlx4_wq { + uint64_t *wrid; + pthread_spinlock_t lock; + int wqe_cnt; + int max_post; + unsigned head; + unsigned tail; + int max_gs; + int wqe_shift; + int offset; +}; + +struct mlx4_qp { + struct ibv_qp ibv_qp; + struct mlx4_buf buf; + int max_inline_data; + int buf_size; + + uint32_t doorbell_qpn; + uint32_t sq_signal_bits; + int sq_spare_wqes; + struct mlx4_wq sq; + + uint32_t *db; + struct mlx4_wq rq; +}; + +struct mlx4_av { + uint32_t port_pd; + uint8_t reserved1; + uint8_t g_slid; + uint16_t dlid; + uint8_t reserved2; + uint8_t gid_index; + uint8_t stat_rate; + uint8_t hop_limit; + uint32_t sl_tclass_flowlabel; + uint8_t dgid[16]; +}; + +struct mlx4_ah { + struct ibv_ah ibv_ah; + struct mlx4_av av; +}; + +struct mlx4_xrc_domain { + struct ibv_xrc_domain ibv_xrcd; + uint32_t xrcdn; +}; + +static inline unsigned long align(unsigned long val, unsigned long align) +{ + return (val + align - 1) & ~(align - 1); +} + +#define to_mxxx(xxx, type) \ + ((struct mlx4_##type *) \ + ((uint8_t *) ib##xxx - offsetof(struct mlx4_##type, ibv_##xxx))) + +static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx) +{ + return to_mxxx(ctx, context); +} + +static inline struct mlx4_pd *to_mpd(struct ibv_pd *ibpd) +{ + return to_mxxx(pd, pd); +} + +static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq) +{ + return to_mxxx(cq, cq); +} + +static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq) +{ + return to_mxxx(srq, srq); +} + +static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp) +{ + return to_mxxx(qp, qp); +} + +static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah) +{ + return to_mxxx(ah, ah); +} + +#ifdef HAVE_IBV_XRC_OPS +static inline struct mlx4_xrc_domain *to_mxrcd(struct ibv_xrc_domain *ibxrcd) +{ + return to_mxxx(xrcd, xrc_domain); +} +#endif + +struct ibv_context * mlx4_alloc_context(); +struct ibv_context * mlx4_fill_context(struct ibv_context *ctx, + struct ibv_get_context_resp *resp_p); +void mlx4_free_context(struct ibv_context *ctx); + +int mlx4_alloc_buf(struct mlx4_buf *buf, int size, int page_size); +void mlx4_free_buf(struct mlx4_buf *buf); + +uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type); +void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db); + +ib_api_status_t mlx4_poll_cq(const void *h_cq, + ib_wc_t** const pp_free_wclist, + ib_wc_t** const pp_done_wclist); +ib_api_status_t mlx4_arm_cq(const void *h_cq, const boolean_t solicited); +void mlx4_cq_event(struct ibv_cq *cq); +void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, + struct mlx4_srq *srq); +void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe); + +int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, + struct mlx4_srq *srq); +void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind); +ib_api_status_t mlx4_post_srq_recv(const void *h_srq, + ib_recv_wr_t* const wr, + ib_recv_wr_t** bad_wr); + +struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn); +int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn, + struct mlx4_srq *srq); +void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn); + +void mlx4_init_qp_indices(struct mlx4_qp *qp); +void mlx4_qp_init_sq_ownership(struct mlx4_qp *qp); +ib_api_status_t mlx4_post_send(const void *h_qp, + ib_send_wr_t* const wr, + ib_send_wr_t** bad_wr); +ib_api_status_t mlx4_post_recv(const void *h_qp, + ib_recv_wr_t* const wr, + ib_recv_wr_t** bad_wr); + +void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type, + struct mlx4_qp *qp); +int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + enum ibv_qp_type type, struct mlx4_qp *qp); +void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap, + enum ibv_qp_type type); +struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn); +int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp); +void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn); + +#endif /* MLX4_H */ diff --git a/branches/ConnectX/hw/mlx4/user/mlx4_debug.c b/branches/ConnectX/hw/mlx4/user/mlx4_debug.c new file mode 100644 index 00000000..3985f753 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/mlx4_debug.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2005 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// Author: Yossi Leybovich + +#include "mlx4_debug.h" +#include +#include +#include + +#if !defined(EVENT_TRACING) + + +#if DBG +uint32_t g_mlx4_dbg_level = TRACE_LEVEL_WARNING; +uint32_t g_mlx4_dbg_flags= MLX4_DBG_QP | MLX4_DBG_CQ | MLX4_DBG_MEMORY; +#endif + +VOID +_MLX4_PRINT( + IN char* msg, + ... + ) + + { +#if DBG +#define TEMP_BUFFER_SIZE 1024 + va_list list; + UCHAR debugMessageBuffer[TEMP_BUFFER_SIZE]; + HRESULT result; + + va_start(list, msg); + + if (msg) { + + // + // Using new safe string functions instead of _vsnprintf. This function takes + // care of NULL terminating if the message is longer than the buffer. + // + + result = StringCbVPrintfA (debugMessageBuffer, sizeof(debugMessageBuffer), + msg, list); + if(((HRESULT)(result) < 0)) { + + OutputDebugString (": StringCbVPrintfA failed \n"); + return; + } + OutputDebugString ( debugMessageBuffer); + + } + va_end(list); + + return; +#else + UNUSED_PARAM(msg); +#endif //DBG +} + +#endif //EVENT_TRACING + diff --git a/branches/ConnectX/hw/mlx4/user/mlx4_debug.h b/branches/ConnectX/hw/mlx4/user/mlx4_debug.h new file mode 100644 index 00000000..a41a5f3d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/mlx4_debug.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#ifndef _MLX4_DEBUG_H_ +#define _MLX4_DEBUG_H_ + +#include + +extern uint32_t g_mlx4_dbg_level; +extern uint32_t g_mlx4_dbg_flags; + + +#if defined(EVENT_TRACING) +// +// Software Tracing Definitions +// +// + +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(HCACtlGuid,(2C718E52,0D36,4bda,9E58,0FC601818D8F), \ + WPP_DEFINE_BIT( MLX4_DBG_DEV) \ + WPP_DEFINE_BIT( MLX4_DBG_PNP) \ + WPP_DEFINE_BIT( MLX4_DBG_MAD) \ + WPP_DEFINE_BIT( MLX4_DBG_PO) \ + WPP_DEFINE_BIT( MLX4_DBG_CQ) \ + WPP_DEFINE_BIT( MLX4_DBG_QP) \ + WPP_DEFINE_BIT( MLX4_DBG_MEMORY) \ + WPP_DEFINE_BIT( MLX4_DBG_SRQ) \ + WPP_DEFINE_BIT( MLX4_DBG_AV) \ + WPP_DEFINE_BIT( MLX4_DBG_SEND) \ + WPP_DEFINE_BIT( MLX4_DBG_RECV) \ + WPP_DEFINE_BIT( MLX4_DBG_LOW)) + + +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) + + +// begin_wpp config +// MLX4_ENTER(FLAG); +// MLX4_EXIT(FLAG); +// USEPREFIX(MLX4_PRINT, "%!FUNC!() "); +// USESUFFIX(MLX4_ENTER, "%!FUNC!===>"); +// USESUFFIX(MLX4_EXIT, "%!FUNC!<==="); +// end_wpp + + +#else + +#include +#include + +/* + * Debug macros + */ + + +#define MLX4_DBG_DEV (1 << 0) +#define MLX4_DBG_PNP (1 << 1) +#define MLX4_DBG_MAD (1 << 2) +#define MLX4_DBG_PO (1 << 3) +#define MLX4_DBG_QP (1 << 4) +#define MLX4_DBG_CQ (1 << 5) +#define MLX4_DBG_MEMORY (1 << 6) +#define MLX4_DBG_SRQ (1 << 7) +#define MLX4_DBG_AV (1 << 8) +#define MLX4_DBG_SEND (1 << 9) +#define MLX4_DBG_RECV (1 << 10) +#define MLX4_DBG_LOW (1 << 11) + + +VOID + _MLX4_PRINT( + IN char* msg, + ...); + +#if DBG + +#define MLX4_PRINT(_level_,_flags_,_msg_) \ + if ((_level_) <= g_mlx4_dbg_level && (_flags_) & g_mlx4_dbg_flags) {\ + _MLX4_PRINT("[MLX4] %s():",__FUNCTION__);\ + if((_level_) == TRACE_LEVEL_ERROR) _MLX4_PRINT ("***ERROR*** ");\ + _MLX4_PRINT _msg_ ; \ + } + + +// +#else + +#define MLX4_PRINT(lvl ,flags, msg) + +#endif + + +#define MLX4_ENTER(flags)\ + MLX4_PRINT(TRACE_LEVEL_VERBOSE, flags,("===>\n")); + +#define MLX4_EXIT(flags)\ + MLX4_PRINT(TRACE_LEVEL_VERBOSE, flags,("<===\n")); + +#define MLX4_PRINT_EXIT(_level_,_flag_,_msg_) \ + {\ + if (status != IB_SUCCESS) {\ + MLX4_PRINT(_level_,_flag_,_msg_);\ + }\ + MLX4_EXIT(_flag_);\ + } + +#endif //EVENT_TRACING + +#endif /*_MLNX_MLX4_DEBUG_H_ */ + diff --git a/branches/ConnectX/hw/mlx4/user/qp.c b/branches/ConnectX/hw/mlx4/user/qp.c new file mode 100644 index 00000000..61e8d1eb --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/qp.c @@ -0,0 +1,755 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "doorbell.h" +#include "wqe.h" +#include "mlx4_debug.h" + +#if defined(EVENT_TRACING) +#include "qp.tmh" +#endif + +static enum mlx4_opcode_type __to_opcode(ib_send_wr_t *wr) +{ + + enum mlx4_opcode_type opcode = MLX4_OPCODE_INVALID; + + switch (wr->wr_type) { + case WR_SEND: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? + MLX4_OPCODE_SEND_IMM : MLX4_OPCODE_SEND; + break; + case WR_RDMA_WRITE: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? + MLX4_OPCODE_RDMA_WRITE_IMM : MLX4_OPCODE_RDMA_WRITE; + break; + case WR_RDMA_READ: + opcode = MLX4_OPCODE_RDMA_READ; + break; + case WR_COMPARE_SWAP: + opcode = MLX4_OPCODE_ATOMIC_CS; + break; + case WR_FETCH_ADD: + opcode = MLX4_OPCODE_ATOMIC_FA; + break; + default: + opcode = MLX4_OPCODE_INVALID; + break; + } + + return opcode; +} + +static void *get_recv_wqe(struct mlx4_qp *qp, int n) +{ + return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); +} + +static void *get_send_wqe(struct mlx4_qp *qp, int n) +{ + return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); +} + +/* + * Stamp a SQ WQE so that it is invalid if prefetched by marking the + * first four bytes of every 64 byte chunk with 0xffffffff, except for + * the very first chunk of the WQE. + */ +static void stamp_send_wqe(struct mlx4_qp *qp, int n) +{ + uint32_t *wqe = get_send_wqe(qp, n); + int i; + + for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) + wqe[i] = 0xffffffff; +} + +void mlx4_init_qp_indices(struct mlx4_qp *qp) +{ + qp->sq.head = 0; + qp->sq.tail = 0; + qp->rq.head = 0; + qp->rq.tail = 0; +} + +void mlx4_qp_init_sq_ownership(struct mlx4_qp *qp) +{ + struct mlx4_wqe_ctrl_seg *ctrl; + int i; + + for (i = 0; i < qp->sq.wqe_cnt; ++i) { + ctrl = get_send_wqe(qp, i); + ctrl->owner_opcode = htonl((uint32_t)1 << 31); + + stamp_send_wqe(qp, i); + } +} + +static int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq) +{ + int cur; + + cur = wq->head - wq->tail; + if (cur + nreq < wq->max_post) + return 0; + + pthread_spin_lock(&cq->lock); + cur = wq->head - wq->tail; + pthread_spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + uint64_t remote_addr, uint32_t rkey) +{ + rseg->raddr = cl_hton64(remote_addr); + rseg->rkey = rkey; + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr) +{ + if (wr->wr_type == WR_COMPARE_SWAP) { + aseg->swap_add = wr->remote_ops.atomic2; + aseg->compare = wr->remote_ops.atomic1; + } else { + aseg->swap_add = wr->remote_ops.atomic1; + aseg->compare = 0; + } +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, ib_send_wr_t *wr) +{ + memcpy(dseg->av, &to_mah((struct ibv_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av)); + dseg->dqpn = wr->dgrm.ud.remote_qp; + dseg->qkey = wr->dgrm.ud.remote_qkey; +} + +static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *ds) +{ + dseg->byte_count = cl_hton32(ds->length); + dseg->lkey = cl_hton32(ds->lkey); + dseg->addr = cl_hton64(ds->vaddr); +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *ds) +{ + dseg->lkey = cl_hton32(ds->lkey); + dseg->addr = cl_hton64(ds->vaddr); + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + dseg->byte_count = cl_hton32(ds->length); +} + +/* + * Avoid using memcpy() to copy to BlueFlame page, since memcpy() + * implementations may use move-string-buffer assembler instructions, + * which do not guarantee order of copying. + */ +static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt) +{ +#ifdef _WIN64 + uint64_t *d = (uint64_t *)dst; + uint64_t *s = (uint64_t *)src; + + while (bytecnt > 0) { + *d++ = *s++; + *d++ = *s++; + bytecnt -= 2 * sizeof (uint64_t); + } +#else + while (bytecnt > 0) { + *dst++ = *src++; + *dst++ = *src++; + bytecnt -= 2 * sizeof (unsigned long); + } +#endif +} + +ib_api_status_t +mlx4_post_send( + IN const void* h_qp, + IN ib_send_wr_t* const p_wr, + OUT ib_send_wr_t** bad_wr) +{ + struct ibv_qp *ibqp = (struct ibv_qp *)h_qp; + struct mlx4_qp *qp = to_mqp(ibqp); + struct mlx4_context *ctx; + uint8_t *wqe; + struct mlx4_wqe_ctrl_seg *ctrl = NULL; + enum mlx4_opcode_type opcode; + int ind; + int nreq; + int inl = 0; + ib_api_status_t status = IB_SUCCESS; + ib_send_wr_t *wr = p_wr; + int size = 0; + uint32_t i; + + pthread_spin_lock(&qp->sq.lock); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.head; + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) { + status = IB_INSUFFICIENT_RESOURCES; + *bad_wr = wr; + goto out; + } + + if (wr->num_ds > (uint32_t)qp->sq.max_gs) { + status = IB_INVALID_MAX_SGE; + *bad_wr = wr; + goto out; + } + + opcode = __to_opcode(wr); + if (opcode == MLX4_OPCODE_INVALID) { + status = IB_INVALID_WR_TYPE; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + ctrl = (struct mlx4_wqe_ctrl_seg *)wqe; + qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + + ctrl->xrcrb_flags = + (wr->send_opt & IB_SEND_OPT_SIGNALED ? + cl_hton32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | + (wr->send_opt & IB_SEND_OPT_SOLICITED ? + cl_hton32(MLX4_WQE_CTRL_SOLICIT) : 0) | + qp->sq_signal_bits; + + if (opcode == MLX4_OPCODE_SEND_IMM || + opcode == MLX4_OPCODE_RDMA_WRITE_IMM) + ctrl->imm = wr->immediate_data; + else + ctrl->imm = 0; + + wqe += sizeof *ctrl; + size = sizeof *ctrl / 16; + + switch (ibqp->qp_type) { + case IBV_QPT_XRC: + //ctrl->xrcrb_flags |= cl_hton32(wr->xrc_remote_srq_num << 8); + /* fall thru */ + case IBV_QPT_RC: + case IBV_QPT_UC: + switch (opcode) { + case MLX4_OPCODE_ATOMIC_CS: + case MLX4_OPCODE_ATOMIC_FA: + set_raddr_seg((struct mlx4_wqe_raddr_seg *)wqe, wr->remote_ops.vaddr, + wr->remote_ops.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_atomic_seg((struct mlx4_wqe_atomic_seg *)wqe, wr); + wqe += sizeof (struct mlx4_wqe_atomic_seg); + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_atomic_seg)) / 16; + + break; + + case MLX4_OPCODE_RDMA_READ: + inl = 1; + /* fall through */ + case MLX4_OPCODE_RDMA_WRITE: + case MLX4_OPCODE_RDMA_WRITE_IMM: + set_raddr_seg((struct mlx4_wqe_raddr_seg *)wqe, wr->remote_ops.vaddr, + wr->remote_ops.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + size += sizeof (struct mlx4_wqe_raddr_seg) / 16; + + break; + + default: + /* No extra segments required for sends */ + break; + } + break; + + case IBV_QPT_UD: + set_datagram_seg((struct mlx4_wqe_datagram_seg *)wqe, wr); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + break; + + default: + break; + } + + if (wr->send_opt & IB_SEND_OPT_INLINE && wr->num_ds) { + struct mlx4_wqe_inline_seg *seg; + uint8_t *addr; + int len, seg_len; + int num_seg; + int off, to_copy; + + inl = 0; + + seg = (struct mlx4_wqe_inline_seg *)wqe; + wqe += sizeof *seg; + off = (int)(((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1)); + num_seg = 0; + seg_len = 0; + + for (i = 0; i < wr->num_ds; ++i) { + addr = (uint8_t *)(uintptr_t)wr->ds_array[i].vaddr; + len = wr->ds_array[i].length; + inl += len; + + if (inl > qp->max_inline_data) { + inl = 0; + status = IB_INVALID_PARAMETER; + *bad_wr = wr; + goto out; + } + + while (len >= MLX4_INLINE_ALIGN - off) { + to_copy = MLX4_INLINE_ALIGN - off; + memcpy(wqe, addr, to_copy); + len -= to_copy; + wqe += to_copy; + addr += to_copy; + seg_len += to_copy; + wmb(); /* see comment below */ + seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); + seg_len = 0; + seg = (struct mlx4_wqe_inline_seg *)wqe; + wqe += sizeof *seg; + off = sizeof *seg; + ++num_seg; + } + + memcpy(wqe, addr, len); + wqe += len; + seg_len += len; + off += len; + } + + if (seg_len) { + ++num_seg; + /* + * Need a barrier here to make sure + * all the data is visible before the + * byte_count field is set. Otherwise + * the HCA prefetcher could grab the + * 64-byte chunk with this inline + * segment and get a valid (!= + * 0xffffffff) byte count but stale + * data, and end up sending the wrong + * data. + */ + wmb(); + seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); + } + + size += (inl + num_seg * sizeof * seg + 15) / 16; + } else { + struct mlx4_wqe_data_seg *seg = (struct mlx4_wqe_data_seg *)wqe; + + for (i = wr->num_ds; i > 0; --i) + set_data_seg(seg + i - 1, wr->ds_array + i - 1); + + size += wr->num_ds * (sizeof *seg / 16); + } + + ctrl->fence_size = (uint8_t)((wr->send_opt & IB_SEND_OPT_FENCE ? + MLX4_WQE_CTRL_FENCE : 0) | size); + + /* + * Make sure descriptor is fully written before + * setting ownership bit (because HW can start + * executing as soon as we do). + */ + wmb(); + + ctrl->owner_opcode = htonl(opcode) | + (ind & qp->sq.wqe_cnt ? htonl((uint32_t)1 << 31) : 0); + + /* + * We can improve latency by not stamping the last + * send queue WQE until after ringing the doorbell, so + * only stamp here if there are still more WQEs to post. + */ + if (wr->p_next) + stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & + (qp->sq.wqe_cnt - 1)); + + ++ind; + + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_QP, ("qpn %#x, wr_id %#I64x, ix %d, \n", + qp->ibv_qp.qp_num, wr->wr_id, ind - 1)); + } + +out: + ctx = to_mctx(ibqp->context); + + if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) { + ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8); + *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn; + /* + * Make sure that descriptor is written to memory + * before writing to BlueFlame page. + */ + wmb(); + + ++qp->sq.head; + + pthread_spin_lock(&ctx->bf_lock); + + mlx4_bf_copy((unsigned long *) (ctx->bf_page + ctx->bf_offset), + (unsigned long *) ctrl, align(size * 16, 64)); + + wc_wmb(); + + ctx->bf_offset ^= ctx->bf_buf_size; + + pthread_spin_unlock(&ctx->bf_lock); + }else if (nreq) { + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn; + } + + if (nreq) + stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & + (qp->sq.wqe_cnt - 1)); + + pthread_spin_unlock(&qp->sq.lock); + + return status; +} + + +ib_api_status_t +mlx4_post_recv( + IN const void* h_qp, + IN ib_recv_wr_t* const p_wr, + OUT ib_recv_wr_t** bad_wr) +{ + struct mlx4_qp *qp = to_mqp((struct ibv_qp *)h_qp); + struct mlx4_wqe_data_seg *scat; + ib_api_status_t status = IB_SUCCESS; + ib_recv_wr_t *wr = p_wr; + int nreq; + int ind; + uint32_t i; + + pthread_spin_lock(&qp->rq.lock); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) { + status = IB_INSUFFICIENT_RESOURCES; + *bad_wr = wr; + goto out; + } + + if (wr->num_ds > (uint32_t)qp->rq.max_gs) { + status = IB_INVALID_MAX_SGE; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + + for (i = 0; i < wr->num_ds; ++i) + __set_data_seg(scat + i, wr->ds_array + i); + + if (i < (uint32_t)qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = htonl(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_QP, ("qpn %#x, wr_id %#I64x, ix %d, \n", + qp->ibv_qp.qp_num, wr->wr_id, ind - 1)); + } + +out: + if (nreq) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db = htonl(qp->rq.head & 0xffff); + } + + pthread_spin_unlock(&qp->rq.lock); + + return status; +} + +static int num_inline_segs(int data, enum ibv_qp_type type) +{ + /* + * Inline data segments are not allowed to cross 64 byte + * boundaries. For UD QPs, the data segments always start + * aligned to 64 bytes (16 byte control segment + 48 byte + * datagram segment); for other QPs, there will be a 16 byte + * control segment and possibly a 16 byte remote address + * segment, so in the worst case there will be only 32 bytes + * available for the first data segment. + */ + if (type == IBV_QPT_UD) + data += (sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg)) % + MLX4_INLINE_ALIGN; + else + data += (sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_raddr_seg)) % + MLX4_INLINE_ALIGN; + + return (int)(data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) / + (MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg)); +} + +void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type, + struct mlx4_qp *qp) +{ + int size; + unsigned max_sq_sge; + + max_sq_sge = align(cap->max_inline_data + + num_inline_segs(cap->max_inline_data, type) * + sizeof (struct mlx4_wqe_inline_seg), + sizeof (struct mlx4_wqe_data_seg)) / + sizeof (struct mlx4_wqe_data_seg); + if (max_sq_sge < cap->max_send_sge) + max_sq_sge = cap->max_send_sge; + + size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg); + switch (type) { + case IBV_QPT_UD: + size += sizeof (struct mlx4_wqe_datagram_seg); + break; + + case IBV_QPT_UC: + size += sizeof (struct mlx4_wqe_raddr_seg); + break; + + case IBV_QPT_XRC: + case IBV_QPT_RC: + size += sizeof (struct mlx4_wqe_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + if (size < (sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_data_seg))) + size = (sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_data_seg)); + break; + + default: + break; + } + + /* Make sure that we have enough space for a bind request */ + if (size < sizeof (struct mlx4_wqe_bind_seg)) + size = sizeof (struct mlx4_wqe_bind_seg); + + size += sizeof (struct mlx4_wqe_ctrl_seg); + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; /* nothing */ +} + +int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + enum ibv_qp_type type, struct mlx4_qp *qp) +{ + UNREFERENCED_PARAMETER(type); + + qp->rq.max_gs = cap->max_recv_sge; + + qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t)); + if (!qp->sq.wrid) + return -1; + + if (qp->rq.wqe_cnt) { + qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t)); + if (!qp->rq.wrid) { + free(qp->sq.wrid); + return -1; + } + } + + for (qp->rq.wqe_shift = 4; + (uint32_t)(1 << qp->rq.wqe_shift) < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg); + qp->rq.wqe_shift++) + ; /* nothing */ + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + if (qp->rq.wqe_shift > qp->sq.wqe_shift) { + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + } else { + qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; + qp->sq.offset = 0; + } + + if (mlx4_alloc_buf(&qp->buf, qp->buf_size, pd->context->page_size)) { + free(qp->sq.wrid); + if (qp->rq.wqe_cnt) + free(qp->rq.wrid); + return -1; + } + + memset(qp->buf.buf, 0, qp->buf_size); + + return 0; +} + +void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap, + enum ibv_qp_type type) +{ + int wqe_size; + struct mlx4_context *ctx = to_mctx(qp->ibv_qp.context); + + wqe_size = (uint32_t)(1 << qp->sq.wqe_shift) - sizeof (struct mlx4_wqe_ctrl_seg); + switch (type) { + case IBV_QPT_UD: + wqe_size -= sizeof (struct mlx4_wqe_datagram_seg); + break; + + case IBV_QPT_UC: + case IBV_QPT_RC: + case IBV_QPT_XRC: + wqe_size -= sizeof (struct mlx4_wqe_raddr_seg); + break; + + default: + break; + } + + qp->sq.max_gs = wqe_size / sizeof (struct mlx4_wqe_data_seg); + cap->max_send_sge = min(ctx->max_sge, qp->sq.max_gs); + qp->sq.max_post = min(ctx->max_qp_wr, + qp->sq.wqe_cnt - qp->sq_spare_wqes); + cap->max_send_wr = qp->sq.max_post; + + /* + * Inline data segments can't cross a 64 byte boundary. So + * subtract off one segment header for each 64-byte chunk, + * taking into account the fact that wqe_size will be 32 mod + * 64 for non-UD QPs. + */ + qp->max_inline_data = wqe_size - + (int) sizeof (struct mlx4_wqe_inline_seg) * + (align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN); + cap->max_inline_data = qp->max_inline_data; +} + +struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + if (ctx->qp_table[tind].refcnt) + return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; + else + return NULL; +} + +int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + int ret = 0; + + pthread_mutex_lock(&ctx->qp_table_mutex); + + if (!ctx->qp_table[tind].refcnt) { + ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1, + sizeof (struct mlx4_qp *)); + if (!ctx->qp_table[tind].table) { + ret = -1; + goto out; + } + } + + ++ctx->qp_table[tind].refcnt; + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; + +out: + pthread_mutex_unlock(&ctx->qp_table_mutex); + return ret; +} + +void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + pthread_mutex_lock(&ctx->qp_table_mutex); + + if (!--ctx->qp_table[tind].refcnt) + free(ctx->qp_table[tind].table); + else + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; + + pthread_mutex_unlock(&ctx->qp_table_mutex); +} diff --git a/branches/ConnectX/hw/mlx4/user/srq.c b/branches/ConnectX/hw/mlx4/user/srq.c new file mode 100644 index 00000000..4e22498d --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/srq.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "doorbell.h" +#include "wqe.h" + +static void *get_wqe(struct mlx4_srq *srq, int n) +{ + return srq->buf.buf + (n << srq->wqe_shift); +} + +void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind) +{ + struct mlx4_wqe_srq_next_seg *next; + + pthread_spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = htons((uint16_t)ind); + srq->tail = ind; + + pthread_spin_unlock(&srq->lock); +} + +ib_api_status_t +mlx4_post_srq_recv( + IN const void* h_srq, + IN ib_recv_wr_t* const p_wr, + OUT ib_recv_wr_t** bad_wr) +{ + struct mlx4_srq *srq = to_msrq((struct ibv_srq *)h_srq); + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scat; + ib_api_status_t status = IB_SUCCESS; + ib_recv_wr_t *wr = p_wr; + uint16_t nreq; + uint32_t i; + + pthread_spin_lock(&srq->lock); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (wr->num_ds > (uint32_t)srq->max_gs) { + status = IB_INVALID_MAX_SGE; + *bad_wr = wr; + break; + } + + if (srq->head == srq->tail) { + /* SRQ is full*/ + status = IB_INSUFFICIENT_RESOURCES; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = ntohs(next->next_wqe_index); + scat = (struct mlx4_wqe_data_seg *) (next + 1); + + for (i = 0; i < wr->num_ds; ++i) { + scat[i].byte_count = htonl(wr->ds_array[i].length); + scat[i].lkey = htonl(wr->ds_array[i].lkey); + scat[i].addr = htonll(wr->ds_array[i].vaddr); + } + + if (i < (uint32_t)srq->max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = htonl(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (nreq) { + srq->counter = srq->counter + nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + + *srq->db = htonl(srq->counter); + } + + pthread_spin_unlock(&srq->lock); + + return status; +} + +int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, + struct mlx4_srq *srq) +{ + struct mlx4_wqe_srq_next_seg *next; + int size; + int buf_size; + int i; + + UNREFERENCED_PARAMETER(attr); + + srq->wrid = malloc(srq->max * sizeof (uint64_t)); + if (!srq->wrid) + return -1; + + size = sizeof (struct mlx4_wqe_srq_next_seg) + + srq->max_gs * sizeof (struct mlx4_wqe_data_seg); + + for (srq->wqe_shift = 5; 1 << srq->wqe_shift < size; ++srq->wqe_shift) + ; /* nothing */ + + buf_size = srq->max << srq->wqe_shift; + + if (mlx4_alloc_buf(&srq->buf, buf_size, + pd->context->page_size)) { + free(srq->wrid); + return -1; + } + + // srq->buf.buf is zeroed in posix_memalign - memset(srq->buf.buf, 0, buf_size); + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. + */ + + for (i = 0; i < srq->max; ++i) { + next = get_wqe(srq, i); + next->next_wqe_index = htons((uint16_t)((i + 1) & (srq->max - 1))); + } + + srq->head = 0; + srq->tail = srq->max - 1; + + return 0; +} + +struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn) +{ + int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift; + + if (ctx->xrc_srq_table[tind].refcnt) + return ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask]; + else + return NULL; +} + +int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn, + struct mlx4_srq *srq) +{ + int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift; + int ret = 0; + + pthread_mutex_lock(&ctx->xrc_srq_table_mutex); + + if (!ctx->xrc_srq_table[tind].refcnt) { + ctx->xrc_srq_table[tind].table = calloc(ctx->xrc_srq_table_mask + 1, + sizeof (struct mlx4_srq *)); + if (!ctx->xrc_srq_table[tind].table) { + ret = -1; + goto out; + } + } + + ++ctx->xrc_srq_table[tind].refcnt; + ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = srq; + +out: + pthread_mutex_unlock(&ctx->xrc_srq_table_mutex); + return ret; +} + +void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn) +{ + int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift; + + pthread_mutex_lock(&ctx->xrc_srq_table_mutex); + + if (!--ctx->xrc_srq_table[tind].refcnt) + free(ctx->xrc_srq_table[tind].table); + else + ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = NULL; + + pthread_mutex_unlock(&ctx->xrc_srq_table_mutex); +} + diff --git a/branches/ConnectX/hw/mlx4/user/verbs.c b/branches/ConnectX/hw/mlx4/user/verbs.c new file mode 100644 index 00000000..a0bf5dcb --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/verbs.c @@ -0,0 +1,1539 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" +#include "verbs.h" +#include "mx_abi.h" +#include "wqe.h" +#include "mlx4_debug.h" + +#if defined(EVENT_TRACING) +#include "verbs.tmh" +#endif + +ib_api_status_t +mlx4_pre_open_ca ( + IN const ib_net64_t ca_guid, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_ca_handle_t *ph_uvp_ca ) +{ + struct ibv_context *context; + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(ca_guid); + + context = mlx4_alloc_context(); + if (!context) { + status = IB_INSUFFICIENT_MEMORY; + goto end; + } + + if( p_umv_buf ) + { + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( sizeof(struct ibv_get_context_resp) ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto end; + } + } + p_umv_buf->input_size = 0; + p_umv_buf->output_size = sizeof(struct ibv_get_context_resp); + p_umv_buf->command = TRUE; + } + + *ph_uvp_ca = (ib_ca_handle_t)context; + +end: + return status; +} + +ib_api_status_t +mlx4_post_open_ca ( + IN const ib_net64_t ca_guid, + IN ib_api_status_t ioctl_status, + IN OUT ib_ca_handle_t *ph_uvp_ca, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_get_context_resp *p_resp; + struct ibv_context *context = (struct ibv_context *)*ph_uvp_ca; + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(ca_guid); + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + if (!mlx4_fill_context(context, p_resp)) + { + status = IB_INSUFFICIENT_RESOURCES; + goto end; + } + } + +end: + cl_free(p_resp); + return status; +} + +ib_api_status_t +mlx4_pre_query_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_ca_attr_t *p_ca_attr, + IN size_t byte_count, + IN ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(h_uvp_ca); + + /* Note that query_ca calls *always* get their attributes from the kernel. + * + * Assume if user buffer is valid then byte_cnt is valid too + * so we can preallocate ca attr buffer for post ioctl data saving + * + * Note that we squirrel the buffer away into the umv_buf and only + * set it into the HCA if the query is successful. + */ + if ( p_ca_attr != NULL ) + { + p_umv_buf->p_inout_buf = cl_malloc(byte_count); + if ( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_RESOURCES; + goto end; + } + } + +end: + return status; +} + +void +mlx4_post_query_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN ib_ca_attr_t *p_ca_attr, + IN size_t byte_count, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_context *context = (struct ibv_context *)h_uvp_ca; + + CL_ASSERT(context && p_umv_buf); + + if ( ioctl_status == IB_SUCCESS && p_ca_attr && byte_count) + { + CL_ASSERT( byte_count >= p_ca_attr->size ); + + pthread_mutex_lock(&context->mutex); + + if (context->p_hca_attr) + cl_free(context->p_hca_attr); + context->p_hca_attr = p_umv_buf->p_inout_buf; + ib_copy_ca_attr(context->p_hca_attr, p_ca_attr); + + pthread_mutex_unlock(&context->mutex); + } + else if (p_umv_buf->p_inout_buf) + { + cl_free(p_umv_buf->p_inout_buf); + } +} + +ib_api_status_t +mlx4_post_close_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status ) +{ + struct ibv_context *context = (struct ibv_context *)h_uvp_ca; + + CL_ASSERT(context); + + if (IB_SUCCESS == ioctl_status) + mlx4_free_context(context); + + return IB_SUCCESS; +} + +ib_api_status_t +mlx4_pre_alloc_pd ( + IN const ib_ca_handle_t h_uvp_ca, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_pd_handle_t *ph_uvp_pd ) +{ + struct mlx4_pd *pd; + struct ibv_context *context = (struct ibv_context *)h_uvp_ca; + ib_api_status_t status = IB_SUCCESS; + + CL_ASSERT(context && p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc( sizeof(struct ibv_alloc_pd_resp) ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto end; + } + } + p_umv_buf->input_size = 0; + p_umv_buf->output_size = sizeof(struct ibv_alloc_pd_resp); + p_umv_buf->command = TRUE; + + // Mlx4 code: + + pd = cl_malloc(sizeof *pd); + if (!pd) { + status = IB_INSUFFICIENT_MEMORY; + goto end; + } + + pd->ibv_pd.context = context; + + *ph_uvp_pd = (ib_pd_handle_t)&pd->ibv_pd; + +end: + return status; +} + +void +mlx4_post_alloc_pd ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN OUT ib_pd_handle_t *ph_uvp_pd, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_pd *pd = (struct ibv_pd *)*ph_uvp_pd; + struct ibv_alloc_pd_resp *p_resp; + + + UNREFERENCED_PARAMETER(h_uvp_ca); + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + pd->handle = p_resp->pd_handle; + to_mpd(pd)->pdn = p_resp->pdn; + } + else + { + cl_free(to_mpd(pd)); + } + + cl_free(p_resp); + return; +} + +void +mlx4_post_free_pd ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status ) +{ + struct ibv_pd *pd = (struct ibv_pd *)h_uvp_pd; + + CL_ASSERT(pd); + + if (IB_SUCCESS == ioctl_status) + cl_free(to_mpd(pd)); +} + +static int __align_queue_size(int req) +{ + int nent; + + for (nent = 1; nent < req; nent <<= 1) + ; /* nothing */ + + return nent; +} + +ib_api_status_t +mlx4_pre_create_cq ( + IN const ib_ca_handle_t h_uvp_ca, + IN OUT uint32_t* const p_size, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_cq_handle_t *ph_uvp_cq ) +{ + struct mlx4_cq *cq; + struct ibv_create_cq *p_create_cq; + struct ibv_context *context = (struct ibv_context *)h_uvp_ca; + ib_api_status_t status = IB_SUCCESS; + int size = max( sizeof(struct ibv_create_cq), sizeof(struct ibv_create_cq_resp) ); + + CL_ASSERT(h_uvp_ca && p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_umv_buf; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_cq); + p_umv_buf->output_size = sizeof(struct ibv_create_cq_resp); + p_umv_buf->command = TRUE; + + p_create_cq = p_umv_buf->p_inout_buf; + + // Mlx4 code: + + /* Sanity check CQ size before proceeding */ + if (*p_size > 0x3fffff) { + status = IB_INVALID_PARAMETER; + goto err_cqe_size; + } + + cq = cl_malloc(sizeof *cq); + if (!cq) { + status = IB_INSUFFICIENT_MEMORY; + goto err_cq; + } + + if (cl_spinlock_init(&cq->lock)) { + status = IB_INSUFFICIENT_MEMORY; + goto err_lock; + } + + *p_size = __align_queue_size(*p_size + 1); + + if (mlx4_alloc_buf(&cq->buf, *p_size * MLX4_CQ_ENTRY_SIZE, + context->page_size)) + goto err_alloc_buf; + + // cq->buf.buf is zeroed in posix_memalign - memset(cq->buf.buf, 0, buf_size); + + cq->ibv_cq.context = context; + cq->cons_index = 0; + + cq->set_ci_db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ); + if (!cq->set_ci_db) + goto err_alloc_db; + + cq->arm_db = cq->set_ci_db + 1; + *cq->arm_db = 0; + cq->arm_sn = 1; + *cq->set_ci_db = 0; + + p_create_cq->buf_addr = (uintptr_t) cq->buf.buf; + p_create_cq->db_addr = (uintptr_t) cq->set_ci_db; + p_create_cq->arm_sn_addr = (uintptr_t) &cq->arm_sn; + p_create_cq->cqe = --(*p_size); + + *ph_uvp_cq = (ib_cq_handle_t)&cq->ibv_cq; + goto end; + +err_alloc_db: + mlx4_free_buf(&cq->buf); +err_alloc_buf: + cl_spinlock_destroy(&cq->lock); +err_lock: + cl_free(cq); +err_cq: +err_cqe_size: + cl_free(p_umv_buf->p_inout_buf); +err_umv_buf: +end: + return status; +} + +void +mlx4_post_create_cq ( + IN const ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN const uint32_t size, + IN OUT ib_cq_handle_t *ph_uvp_cq, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_cq *cq = (struct ibv_cq *)*ph_uvp_cq; + struct ibv_create_cq_resp *p_resp; + + UNREFERENCED_PARAMETER(h_uvp_ca); + UNREFERENCED_PARAMETER(size); + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + to_mcq(cq)->cqn = p_resp->cqn; + cq->cqe = p_resp->cqe; + cq->handle = p_resp->cq_handle; + } + else + { + mlx4_post_destroy_cq (*ph_uvp_cq, IB_SUCCESS); + } + + cl_free(p_resp); + return; +} + +ib_api_status_t +mlx4_pre_query_cq ( + IN const ib_cq_handle_t h_uvp_cq, + OUT uint32_t* const p_size, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_cq *cq = (struct ibv_cq *)h_uvp_cq; + + UNREFERENCED_PARAMETER(p_umv_buf); + + *p_size = cq->cqe; + + return IB_VERBS_PROCESSING_DONE; +} + +void +mlx4_post_destroy_cq ( + IN const ib_cq_handle_t h_uvp_cq, + IN ib_api_status_t ioctl_status ) +{ + struct ibv_cq *cq = (struct ibv_cq *)h_uvp_cq; + + CL_ASSERT(cq); + + if (IB_SUCCESS == ioctl_status) { + mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db); + mlx4_free_buf(&to_mcq(cq)->buf); + + cl_spinlock_destroy(&to_mcq(cq)->lock); + cl_free(to_mcq(cq)); + } +} + +ib_api_status_t +mlx4_pre_create_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_srq_attr_t *p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_srq_handle_t *ph_uvp_srq ) +{ + struct mlx4_srq *srq; + struct ibv_create_srq *p_create_srq; + struct ibv_pd *pd = (struct ibv_pd *)h_uvp_pd; + ib_api_status_t status = IB_SUCCESS; + size_t size = max( sizeof(struct ibv_create_srq), sizeof(struct ibv_create_srq_resp) ); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_srq); + p_umv_buf->output_size = sizeof(struct ibv_create_srq_resp); + p_umv_buf->command = TRUE; + + p_create_srq = p_umv_buf->p_inout_buf; + + // Mlx4 code: + + /* Sanity check SRQ size before proceeding */ + if (p_srq_attr->max_wr > 1 << 16 || p_srq_attr->max_sge > 64) + { + status = IB_INVALID_PARAMETER; + goto err_params; + } + + srq = cl_malloc(sizeof *srq); + if (!srq) { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_srq; + } + + if (cl_spinlock_init(&srq->lock)) { + status = IB_INSUFFICIENT_MEMORY; + goto err_lock; + } + + srq->ibv_srq.pd = pd; + srq->ibv_srq.context = pd->context; + + srq->max = __align_queue_size(p_srq_attr->max_wr + 1); + srq->max_gs = p_srq_attr->max_sge; + srq->counter = 0; + + if (mlx4_alloc_srq_buf(pd, (struct ibv_srq_attr *)p_srq_attr, srq)) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_buf; + } + + srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); + if (!srq->db) + goto err_alloc_db; + + *srq->db = 0; + + // fill the parameters for ioctl + p_create_srq->buf_addr = (uintptr_t) srq->buf.buf; + p_create_srq->db_addr = (uintptr_t) srq->db; + p_create_srq->pd_handle = pd->handle; + p_create_srq->max_wr = p_srq_attr->max_wr; + p_create_srq->max_sge = p_srq_attr->max_sge; + p_create_srq->srq_limit = p_srq_attr->srq_limit; + + *ph_uvp_srq = (ib_srq_handle_t)&srq->ibv_srq; + goto end; + +err_alloc_db: + cl_free(srq->wrid); + mlx4_free_buf(&srq->buf); +err_alloc_buf: + cl_spinlock_destroy(&srq->lock); +err_lock: + cl_free(srq); +err_alloc_srq: + cl_free(p_umv_buf->p_inout_buf); +err_params: err_memory: +end: + return status; +} + +void +mlx4_post_create_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_srq_handle_t *ph_uvp_srq, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_srq *ibsrq = (struct ibv_srq *)*ph_uvp_srq; + struct mlx4_srq *srq = to_msrq(ibsrq); + struct ibv_create_srq_resp *p_resp; + + UNREFERENCED_PARAMETER(h_uvp_pd); + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + srq->srqn = p_resp->srqn; + ibsrq->handle = p_resp->srq_handle; + + srq->max = p_resp->max_wr; + srq->max_gs = p_resp->max_sge; + } + else + { + mlx4_post_destroy_srq (*ph_uvp_srq, IB_SUCCESS); + } + + cl_free(p_resp); + return; +} + +ib_api_status_t +mlx4_pre_destroy_srq ( + IN const ib_srq_handle_t h_uvp_srq ) +{ + struct ibv_srq *ibsrq = (struct ibv_srq *)h_uvp_srq; + struct mlx4_srq *srq = to_msrq(ibsrq); + struct mlx4_cq *mcq = NULL; + + if (ibsrq->xrc_cq) + { + /* is an xrc_srq */ + mcq = to_mcq(ibsrq->xrc_cq); + mlx4_cq_clean(mcq, 0, srq); + cl_spinlock_acquire(&mcq->lock); + mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn); + cl_spinlock_release(&mcq->lock); + } + return IB_SUCCESS; +} + +void +mlx4_post_destroy_srq ( + IN const ib_srq_handle_t h_uvp_srq, + IN ib_api_status_t ioctl_status ) +{ + struct ibv_srq *ibsrq = (struct ibv_srq *)h_uvp_srq; + struct mlx4_srq *srq = to_msrq(ibsrq); + struct mlx4_cq *mcq = NULL; + + CL_ASSERT(srq); + + if (IB_SUCCESS == ioctl_status) + { + mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db); + cl_free(srq->wrid); + mlx4_free_buf(&srq->buf); + cl_spinlock_destroy(&srq->lock); + cl_free(srq); + } + else + { + if (ibsrq->xrc_cq) { + cl_spinlock_acquire(&mcq->lock); + mlx4_store_xrc_srq(to_mctx(ibsrq->context), srq->srqn, srq); + cl_spinlock_release(&mcq->lock); + } + } +} + +static enum ibv_qp_type +__to_qp_type(ib_qp_type_t type) +{ + switch (type) { + case IB_QPT_RELIABLE_CONN: return IBV_QPT_RC; + case IB_QPT_UNRELIABLE_CONN: return IBV_QPT_UC; + case IB_QPT_UNRELIABLE_DGRM: return IBV_QPT_UD; + //case IB_QPT_XRC_CONN: return IBV_QPT_XRC; + default: return IBV_QPT_RC; + } +} + +ib_api_status_t +mlx4_pre_create_qp ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_qp_create_t *p_create_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_qp_handle_t *ph_uvp_qp ) +{ + struct ibv_pd *pd = (struct ibv_pd *)h_uvp_pd; + struct mlx4_context *context = to_mctx(pd->context); + struct mlx4_qp *qp; + struct ibv_create_qp *p_create_qp; + struct ibv_qp_init_attr attr; + ib_api_status_t status = IB_SUCCESS; + int size = max( sizeof(struct ibv_create_qp), sizeof(struct ibv_create_qp_resp) ); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc(size); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_qp); + p_umv_buf->output_size = sizeof(struct ibv_create_qp_resp); + p_umv_buf->command = TRUE; + + p_create_qp = p_umv_buf->p_inout_buf; + + /* convert attributes */ + attr.send_cq = (struct ibv_cq *)p_create_attr->h_sq_cq; + attr.recv_cq = (struct ibv_cq *)p_create_attr->h_rq_cq; + attr.srq = (struct ibv_srq*)p_create_attr->h_srq; + attr.cap.max_send_wr = p_create_attr->sq_depth; + attr.cap.max_recv_wr = p_create_attr->rq_depth; + attr.cap.max_send_sge = p_create_attr->sq_sge; + attr.cap.max_recv_sge = p_create_attr->rq_sge; + attr.cap.max_inline_data = 0; /* absent in IBAL */ + attr.qp_type = __to_qp_type(p_create_attr->qp_type); + attr.sq_sig_all = p_create_attr->sq_signaled; + + // Mlx4 code: + + /* Sanity check QP size before proceeding */ + if (attr.cap.max_send_wr > (uint32_t) context->max_qp_wr || + attr.cap.max_recv_wr > (uint32_t) context->max_qp_wr || + attr.cap.max_send_sge > (uint32_t) context->max_sge || + attr.cap.max_recv_sge > (uint32_t) context->max_sge || + attr.cap.max_inline_data > 1024) + { + status = IB_INVALID_PARAMETER; + goto end; + } + + qp = cl_malloc(sizeof *qp); + if (!qp) { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_qp; + } + + mlx4_calc_sq_wqe_size(&attr.cap, attr.qp_type, qp); + + /* + * We need to leave 2 KB + 1 WQE of headroom in the SQ to + * allow HW to prefetch. + */ + qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq.wqe_cnt = __align_queue_size(attr.cap.max_send_wr + qp->sq_spare_wqes); + qp->rq.wqe_cnt = __align_queue_size(attr.cap.max_recv_wr); + + if (attr.srq || attr.qp_type == IBV_QPT_XRC) + attr.cap.max_recv_wr = qp->rq.wqe_cnt = 0; + else { + if (attr.cap.max_recv_sge < 1) + attr.cap.max_recv_sge = 1; + if (attr.cap.max_recv_wr < 1) + attr.cap.max_recv_wr = 1; + } + + if (mlx4_alloc_qp_buf(pd, &attr.cap, attr.qp_type, qp)) + goto err_alloc_qp_buff; + + mlx4_init_qp_indices(qp); + + if (cl_spinlock_init(&qp->sq.lock)) { + status = IB_INSUFFICIENT_MEMORY; + goto err_spinlock_sq; + } + if (cl_spinlock_init(&qp->rq.lock)) { + status = IB_INSUFFICIENT_MEMORY; + goto err_spinlock_rq; + } + + // fill qp fields + if (!attr.srq && attr.qp_type != IBV_QPT_XRC) { + qp->db = mlx4_alloc_db(context, MLX4_DB_TYPE_RQ); + if (!qp->db) { + status = IB_INSUFFICIENT_MEMORY; + goto err_db; + } + + *qp->db = 0; + } + if (attr.sq_sig_all) + qp->sq_signal_bits = cl_hton32(MLX4_WQE_CTRL_CQ_UPDATE); + else + qp->sq_signal_bits = 0; + + // fill the rest of qp fields + qp->ibv_qp.pd = pd; + qp->ibv_qp.context= pd->context; + qp->ibv_qp.send_cq = attr.send_cq; + qp->ibv_qp.recv_cq = attr.recv_cq; + qp->ibv_qp.srq = attr.srq; + qp->ibv_qp.state = IBV_QPS_RESET; + qp->ibv_qp.qp_type = attr.qp_type; + + // fill request fields + p_create_qp->buf_addr = (uintptr_t) qp->buf.buf; + if (!attr.srq && attr.qp_type != IBV_QPT_XRC) + p_create_qp->db_addr = (uintptr_t) qp->db; + else + p_create_qp->db_addr = 0; + + p_create_qp->pd_handle = pd->handle; + p_create_qp->send_cq_handle = attr.send_cq->handle; + p_create_qp->recv_cq_handle = attr.recv_cq->handle; + p_create_qp->srq_handle = attr.qp_type == IBV_QPT_XRC ? + (attr.xrc_domain ? attr.xrc_domain->handle : 0) : + (attr.srq ? attr.srq->handle : 0); + + p_create_qp->max_send_wr = attr.cap.max_send_wr; + p_create_qp->max_recv_wr = attr.cap.max_recv_wr; + p_create_qp->max_send_sge = attr.cap.max_send_sge; + p_create_qp->max_recv_sge = attr.cap.max_recv_sge; + p_create_qp->max_inline_data = attr.cap.max_inline_data; + p_create_qp->sq_sig_all = (uint8_t)attr.sq_sig_all; + p_create_qp->qp_type = attr.qp_type; + p_create_qp->is_srq = (uint8_t)(attr.qp_type == IBV_QPT_XRC ? + !!attr.xrc_domain : !!attr.srq); + + p_create_qp->log_sq_stride = (uint8_t)qp->sq.wqe_shift; + for (p_create_qp->log_sq_bb_count = 0; + qp->sq.wqe_cnt > 1 << p_create_qp->log_sq_bb_count; + ++p_create_qp->log_sq_bb_count) + ; /* nothing */ + p_create_qp->sq_no_prefetch = 0; + + *ph_uvp_qp = (ib_qp_handle_t)&qp->ibv_qp; + goto end; + +err_db: + cl_spinlock_destroy(&qp->rq.lock); +err_spinlock_rq: + cl_spinlock_destroy(&qp->sq.lock); +err_spinlock_sq: + cl_free(qp->sq.wrid); + if (qp->rq.wqe_cnt) + free(qp->rq.wrid); + mlx4_free_buf(&qp->buf); +err_alloc_qp_buff: + cl_free(qp); +err_alloc_qp: + cl_free(p_umv_buf->p_inout_buf); +err_memory: +end: + return status; +} + +ib_api_status_t +mlx4_post_create_qp ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_qp_handle_t *ph_uvp_qp, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct mlx4_qp *qp = (struct mlx4_qp *)*ph_uvp_qp; + struct ibv_pd *pd = (struct ibv_pd *)h_uvp_pd; + struct ibv_context *context = pd->context; + struct ibv_create_qp_resp *p_resp; + ib_api_status_t status = IB_SUCCESS; + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + struct ibv_qp_cap cap; + + cap.max_recv_sge = p_resp->max_recv_sge; + cap.max_send_sge = p_resp->max_send_sge; + cap.max_recv_wr = p_resp->max_recv_wr; + cap.max_send_wr = p_resp->max_send_wr; + cap.max_inline_data = p_resp->max_inline_data; + + qp->ibv_qp.handle = p_resp->qp_handle; + qp->ibv_qp.qp_num = p_resp->qpn; + + qp->rq.wqe_cnt = cap.max_recv_wr; + qp->rq.max_gs = cap.max_recv_sge; + + /* adjust rq maxima to not exceed reported device maxima */ + cap.max_recv_wr = min((uint32_t) to_mctx(context)->max_qp_wr, cap.max_recv_wr); + cap.max_recv_sge = min((uint32_t) to_mctx(context)->max_sge, cap.max_recv_sge); + + qp->rq.max_post = cap.max_recv_wr; + //qp->rq.max_gs = cap.max_recv_sge; - RIB : add this ? + mlx4_set_sq_sizes(qp, &cap, qp->ibv_qp.qp_type); + + qp->doorbell_qpn = cl_hton32(qp->ibv_qp.qp_num << 8); + + if (mlx4_store_qp(to_mctx(context), qp->ibv_qp.qp_num, qp)) + { + mlx4_post_destroy_qp(*ph_uvp_qp, IB_SUCCESS); + status = IB_INSUFFICIENT_MEMORY; + } + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_QP, + ("qpn %#x, buf %p, db_rec %p, sq %d:%d, rq %d:%d\n", + qp->ibv_qp.qp_num, qp->buf.buf, qp->db, + qp->sq.head, qp->sq.tail, qp->rq.head, qp->rq.tail )); + } + else + { + mlx4_post_destroy_qp(*ph_uvp_qp, IB_SUCCESS); + } + + cl_free(p_resp); + return status; +} + +ib_api_status_t +mlx4_pre_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN const ib_qp_mod_t *p_modify_attr, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(h_uvp_qp); + UNREFERENCED_PARAMETER(p_modify_attr); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc(sizeof(struct ibv_modify_qp_resp)); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = 0; + p_umv_buf->output_size = sizeof(struct ibv_modify_qp_resp); + p_umv_buf->command = TRUE; + +err_memory: + return status; +} + +void +mlx4_post_query_qp ( + IN ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ib_qp_attr_t *p_query_attr, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + struct mlx4_qp *qp = (struct mlx4_qp *)h_uvp_qp; + + UNREFERENCED_PARAMETER(p_umv_buf); + + if(IB_SUCCESS == ioctl_status) + { + p_query_attr->sq_max_inline = qp->max_inline_data; + p_query_attr->sq_sge = qp->sq.max_gs; + p_query_attr->sq_depth = qp->sq.max_post; + p_query_attr->rq_sge = qp->rq.max_gs; + p_query_attr->rq_depth = qp->rq.max_post; + } +} + +void +mlx4_post_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_qp *qp = (struct ibv_qp *)h_uvp_qp; + struct ibv_modify_qp_resp *p_resp; + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + if (qp->state == IBV_QPS_RESET && + p_resp->attr_mask & IBV_QP_STATE && + p_resp->qp_state == IBV_QPS_INIT) + { + mlx4_qp_init_sq_ownership(to_mqp(qp)); + } + + if (p_resp->attr_mask & IBV_QP_STATE) { + qp->state = p_resp->qp_state; + } + + if (p_resp->attr_mask & IBV_QP_STATE && + p_resp->qp_state == IBV_QPS_RESET) + { + mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, + qp->srq ? to_msrq(qp->srq) : NULL); + if (qp->send_cq != qp->recv_cq) + mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); + + mlx4_init_qp_indices(to_mqp(qp)); + if (!qp->srq && qp->qp_type != IBV_QPT_XRC) + *to_mqp(qp)->db = 0; + } + } + + cl_free (p_resp); + return; +} + +static void +__mlx4_lock_cqs(struct ibv_qp *qp) +{ + struct mlx4_cq *send_cq = to_mcq(qp->send_cq); + struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); + + if (send_cq == recv_cq) + cl_spinlock_acquire(&send_cq->lock); + else if (send_cq->cqn < recv_cq->cqn) { + cl_spinlock_acquire(&send_cq->lock); + cl_spinlock_acquire(&recv_cq->lock); + } else { + cl_spinlock_acquire(&recv_cq->lock); + cl_spinlock_acquire(&send_cq->lock); + } +} + +static void +__mlx4_unlock_cqs(struct ibv_qp *qp) +{ + struct mlx4_cq *send_cq = to_mcq(qp->send_cq); + struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); + + if (send_cq == recv_cq) + cl_spinlock_release(&send_cq->lock); + else if (send_cq->cqn < recv_cq->cqn) { + cl_spinlock_release(&recv_cq->lock); + cl_spinlock_release(&send_cq->lock); + } else { + cl_spinlock_release(&send_cq->lock); + cl_spinlock_release(&recv_cq->lock); + } +} + +ib_api_status_t +mlx4_pre_destroy_qp ( + IN const ib_qp_handle_t h_uvp_qp ) +{ + struct ibv_qp *qp = (struct ibv_qp*)h_uvp_qp; + + mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, + qp->srq ? to_msrq(qp->srq) : NULL); + if (qp->send_cq != qp->recv_cq) + mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); + + __mlx4_lock_cqs(qp); + mlx4_clear_qp(to_mctx(qp->context), qp->qp_num); + __mlx4_unlock_cqs(qp); + + return IB_SUCCESS; +} + +void +mlx4_post_destroy_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status ) +{ + struct ibv_qp* ibqp = (struct ibv_qp *)h_uvp_qp; + struct mlx4_qp* qp = to_mqp(ibqp); + + CL_ASSERT(h_uvp_qp); + + if (IB_SUCCESS == ioctl_status) + { + if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC) + mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db); + + cl_spinlock_destroy(&qp->sq.lock); + cl_spinlock_destroy(&qp->rq.lock); + + MLX4_PRINT( TRACE_LEVEL_INFORMATION, MLX4_DBG_QP, + ("qpn %#x, buf %p, sq %d:%d, rq %d:%d\n", qp->ibv_qp.qp_num, qp->buf.buf, + qp->sq.head, qp->sq.tail, qp->rq.head, qp->rq.tail )); + cl_free(qp->sq.wrid); + if (qp->rq.wqe_cnt) + cl_free(qp->rq.wrid); + mlx4_free_buf(&qp->buf); + cl_free(qp); + } + else + { + __mlx4_lock_cqs(ibqp); + mlx4_store_qp(to_mctx(ibqp->context), ibqp->qp_num, qp); + __mlx4_unlock_cqs(ibqp); + } +} + +void +mlx4_nd_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + OUT void** pp_outbuf, + OUT DWORD* p_size ) +{ + struct ibv_qp *ibv_qp = (struct ibv_qp *)h_uvp_qp; + + *(uint32_t**)pp_outbuf = (uint32_t*)&ibv_qp->state; + *p_size = sizeof(ibv_qp->state); +} + +static ib_qp_state_t __from_qp_state(enum ibv_qp_state state) +{ + switch (state) { + case IBV_QPS_RESET: return IB_QPS_RESET; + case IBV_QPS_INIT: return IB_QPS_INIT; + case IBV_QPS_RTR: return IB_QPS_RTR; + case IBV_QPS_RTS: return IB_QPS_RTS; + case IBV_QPS_SQD: return IB_QPS_SQD; + case IBV_QPS_SQE: return IB_QPS_SQERR; + case IBV_QPS_ERR: return IB_QPS_ERROR; + default: return IB_QPS_TIME_WAIT; + }; +} + +uint32_t +mlx4_nd_get_qp_state ( + IN const ib_qp_handle_t h_uvp_qp ) +{ + struct ibv_qp *ibv_qp = (struct ibv_qp *)h_uvp_qp; + + return __from_qp_state(ibv_qp->state); +} + +static uint8_t +__gid_to_index_lookup ( + IN ib_ca_attr_t *p_ca_attr, + IN uint8_t port_num, + IN uint8_t *raw_gid ) +{ + ib_gid_t *p_gid_table = NULL; + uint8_t i, index = 0; + uint16_t num_gids; + + p_gid_table = p_ca_attr->p_port_attr[port_num-1].p_gid_table; + CL_ASSERT (p_gid_table); + + num_gids = p_ca_attr->p_port_attr[port_num-1].num_gids; + + for (i = 0; i < num_gids; i++) + { + if (cl_memcmp (raw_gid, p_gid_table[i].raw, 16)) + { + index = i; + break; + } + } + return index; +} + +static enum ibv_rate __to_rate(uint8_t rate) +{ + if (rate == IB_PATH_RECORD_RATE_2_5_GBS) return IBV_RATE_2_5_GBPS; + if (rate == IB_PATH_RECORD_RATE_5_GBS) return IBV_RATE_5_GBPS; + if (rate == IB_PATH_RECORD_RATE_10_GBS) return IBV_RATE_10_GBPS; + if (rate == IB_PATH_RECORD_RATE_20_GBS) return IBV_RATE_20_GBPS; + if (rate == IB_PATH_RECORD_RATE_30_GBS) return IBV_RATE_30_GBPS; + if (rate == IB_PATH_RECORD_RATE_40_GBS) return IBV_RATE_40_GBPS; + if (rate == IB_PATH_RECORD_RATE_60_GBS) return IBV_RATE_60_GBPS; + if (rate == IB_PATH_RECORD_RATE_80_GBS) return IBV_RATE_80_GBPS; + if (rate == IB_PATH_RECORD_RATE_120_GBS) return IBV_RATE_120_GBPS; + return IBV_RATE_MAX; +} + +static void +__to_ah ( + IN ib_ca_attr_t *p_ca_attr, + IN const ib_av_attr_t *p_av_attr, + OUT struct ibv_ah_attr *p_attr ) +{ + p_attr->port_num = p_av_attr->port_num; + p_attr->sl = p_av_attr->sl; + p_attr->dlid = cl_ntoh16 (p_av_attr->dlid); + p_attr->static_rate = __to_rate(p_av_attr->static_rate); + p_attr->src_path_bits = p_av_attr->path_bits; + + /* For global destination or Multicast address:*/ + if (p_av_attr->grh_valid) + { + p_attr->is_global = TRUE; + p_attr->grh.hop_limit = p_av_attr->grh.hop_limit; + ib_grh_get_ver_class_flow( p_av_attr->grh.ver_class_flow, NULL, + &p_attr->grh.traffic_class, &p_attr->grh.flow_label ); + p_attr->grh.sgid_index = __gid_to_index_lookup (p_ca_attr, p_av_attr->port_num, + (uint8_t *) p_av_attr->grh.src_gid.raw); + cl_memcpy (p_attr->grh.dgid.raw, p_av_attr->grh.dest_gid.raw, 16); + } + else + { + p_attr->is_global = FALSE; + } +} + +static void +__set_av_params(struct mlx4_ah *ah, struct ibv_pd *pd, struct ibv_ah_attr *attr) +{ + ah->av.port_pd = cl_hton32(to_mpd(pd)->pdn | (attr->port_num << 24)); + ah->av.g_slid = attr->src_path_bits; + ah->av.dlid = cl_hton16(attr->dlid); + if (attr->static_rate) { + ah->av.stat_rate = (uint8_t)(attr->static_rate + MLX4_STAT_RATE_OFFSET); + /* XXX check rate cap? */ + } + ah->av.sl_tclass_flowlabel = cl_hton32(attr->sl << 28); + if (attr->is_global) + { + ah->av.g_slid |= 0x80; + ah->av.gid_index = attr->grh.sgid_index; + ah->av.hop_limit = attr->grh.hop_limit; + ah->av.sl_tclass_flowlabel |= + cl_hton32((attr->grh.traffic_class << 20) | + attr->grh.flow_label); + cl_memcpy(ah->av.dgid, attr->grh.dgid.raw, 16); + } +} + +ib_api_status_t +mlx4_pre_create_ah ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_av_attr_t *p_av_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_av_handle_t *ph_uvp_av ) +{ + struct mlx4_ah *ah; + struct ibv_ah_attr attr; + struct ibv_pd *pd = (struct ibv_pd *)h_uvp_pd; + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(p_umv_buf); + + if (pd->context->p_hca_attr == NULL) { + status = IB_ERROR; + goto end; + } + + ah = cl_malloc(sizeof *ah); + if (!ah) { + status = IB_INSUFFICIENT_MEMORY; + goto end; + } + + // sanity check + if (p_av_attr->port_num == 0 || + p_av_attr->port_num > pd->context->p_hca_attr->num_ports) + { + status = IB_INVALID_PORT; + goto end; + } + + // convert parameters + cl_memset(&attr, 0, sizeof(attr)); + __to_ah(pd->context->p_hca_attr, p_av_attr, &attr); + + ah->ibv_ah.pd = pd; + ah->ibv_ah.context = pd->context; + cl_memcpy(&ah->ibv_ah.av_attr, p_av_attr, sizeof (ib_av_attr_t)); + + cl_memset(&ah->av, 0, sizeof ah->av); + __set_av_params(ah, pd, &attr); + + *ph_uvp_av = (ib_av_handle_t)&ah->ibv_ah; + status = IB_VERBS_PROCESSING_DONE; + +end: + return status; +} + +ib_api_status_t +mlx4_pre_query_ah ( + IN const ib_av_handle_t h_uvp_av, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_uvp_av); + UNREFERENCED_PARAMETER(p_umv_buf); + + return IB_VERBS_PROCESSING_DONE; +} + +void +mlx4_post_query_ah ( + IN const ib_av_handle_t h_uvp_av, + IN ib_api_status_t ioctl_status, + IN OUT ib_av_attr_t *p_addr_vector, + IN OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_ah *ah = (struct ibv_ah *)h_uvp_av; + + UNREFERENCED_PARAMETER(p_umv_buf); + + CL_ASSERT(h_uvp_av && p_addr_vector); + + if (ioctl_status == IB_SUCCESS) + { + cl_memcpy(p_addr_vector, &ah->av_attr, sizeof(ib_av_attr_t)); + if (ph_pd) + *ph_pd = (ib_pd_handle_t)ah->pd; + } +} + +ib_api_status_t +mlx4_pre_modify_ah ( + IN const ib_av_handle_t h_uvp_av, + IN const ib_av_attr_t *p_addr_vector, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_ah *ah = (struct ibv_ah *)h_uvp_av; + struct ibv_ah_attr attr; + + UNREFERENCED_PARAMETER(p_umv_buf); + + CL_ASSERT (h_uvp_av); + + __to_ah(ah->context->p_hca_attr, p_addr_vector, &attr); + __set_av_params(to_mah(ah), ah->pd, &attr); + cl_memcpy(&ah->av_attr, p_addr_vector, sizeof(ib_av_attr_t)); + + return IB_VERBS_PROCESSING_DONE; +} + +ib_api_status_t +mlx4_pre_destroy_ah ( + IN const ib_av_handle_t h_uvp_av ) +{ + struct ibv_ah *ah = (struct ibv_ah *)h_uvp_av; + + CL_ASSERT(ah); + + cl_free(to_mah(ah)); + + return IB_VERBS_PROCESSING_DONE; +} + +#ifdef HAVE_IBV_XRC_OPS +ib_api_status_t +mlx4_pre_create_xrc_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_xrcd_handle_t h_uvp_xrcd, + IN const ib_srq_attr_t *p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_srq_handle_t *ph_uvp_srq ) +{ + struct mlx4_srq *srq; + struct ibv_create_srq *p_create_srq; + struct ibv_pd *pd = (struct ibv_pd *)h_uvp_pd; + struct ibv_xrc_domain *xrc_domain = (struct ibv_xrc_domain *)h_uvp_xrcd; + ib_api_status_t status = IB_SUCCESS; + size_t size = max( sizeof(struct ibv_create_srq), sizeof(struct ibv_create_srq_resp) ); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_srq); + p_umv_buf->output_size = sizeof(struct ibv_create_srq_resp); + p_umv_buf->command = TRUE; + + p_create_srq = p_umv_buf->p_inout_buf; + + // Mlx4 code: + + /* Sanity check SRQ size before proceeding */ + if (p_srq_attr->max_wr > 1 << 16 || p_srq_attr->max_sge > 64) + { + status = IB_INVALID_PARAMETER; + goto err_params; + } + + srq = cl_malloc(sizeof *srq); + if (!srq) { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_srq; + } + + if (cl_spinlock_init(&srq->lock)) { + status = IB_INSUFFICIENT_MEMORY; + goto err_lock; + } + + srq->ibv_srq.pd = pd; + srq->ibv_srq.context = pd->context; + + srq->max = __align_queue_size(p_srq_attr->max_wr + 1); + srq->max_gs = p_srq_attr->max_sge; + srq->counter = 0; + + if (mlx4_alloc_srq_buf(pd, (struct ibv_srq_attr *)p_srq_attr, srq)) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_buf; + } + + srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); + if (!srq->db) + goto err_alloc_db; + + *srq->db = 0; + + // fill the parameters for ioctl + p_create_srq->buf_addr = (uintptr_t) srq->buf.buf; + p_create_srq->db_addr = (uintptr_t) srq->db; + p_create_srq->pd_handle = pd->handle; + p_create_srq->max_wr = p_srq_attr->max_wr; + p_create_srq->max_sge = p_srq_attr->max_sge; + p_create_srq->srq_limit = p_srq_attr->srq_limit; + + *ph_uvp_srq = (ib_srq_handle_t)&srq->ibv_srq; + goto end; + +err_alloc_db: + cl_free(srq->wrid); + mlx4_free_buf(&srq->buf); +err_alloc_buf: + cl_spinlock_destroy(&srq->lock); +err_lock: + cl_free(srq); +err_alloc_srq: + cl_free(p_umv_buf->p_inout_buf); +err_params: err_memory: +end: + return status; +} + +ib_api_status_t +mlx4_post_create_xrc_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_srq_handle_t *ph_uvp_srq, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct mlx4_srq *srq = (struct mlx4_srq *)*ph_uvp_srq; + struct ibv_create_srq_resp *p_resp; + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(h_uvp_pd); + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + srq->ibv_srq.xrc_srq_num = srq->srqn = p_resp->srqn; + srq->ibv_srq.handle = p_resp->srq_handle; + + srq->max = p_resp->max_wr; + srq->max_gs = p_resp->max_sge; + + if (mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq)) + { + mlx4_post_destroy_srq(*ph_uvp_srq, IB_SUCCESS); + status = IB_INSUFFICIENT_MEMORY; + } + } + else + { + mlx4_post_destroy_srq (*ph_uvp_srq, IB_SUCCESS); + } + + cl_free( p_resp ); + return status; +} + +ib_api_status_t +mlx4_pre_open_xrc_domain ( + IN const ib_ca_handle_t h_uvp_ca, + IN const uint32_t oflag, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_xrcd_handle_t *ph_uvp_xrcd ) +{ + struct mlx4_xrc_domain *xrcd; + struct ibv_context * context = (struct ibv_context *)h_uvp_ca; + struct ibv_open_xrc_domain *p_open_xrcd; + ib_api_status_t status = IB_SUCCESS; + int size = max( sizeof(struct ibv_open_xrc_domain), sizeof(struct ibv_open_xrc_domain_resp) ); + + CL_ASSERT(h_uvp_ca && p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_malloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_umv_buf; + } + } + p_umv_buf->input_size = sizeof(struct ibv_open_xrc_domain); + p_umv_buf->output_size = sizeof(struct ibv_open_xrc_domain_resp); + p_umv_buf->command = TRUE; + + p_open_xrcd = p_umv_buf->p_inout_buf; + + // Mlx4 code: + + xrcd = cl_malloc(sizeof *xrcd); + if (!xrcd) { + status = IB_INSUFFICIENT_MEMORY; + goto err_xrc; + } + + xrcd->ibv_xrcd.context = context; + + p_open_xrcd->oflags = oflag; + + *ph_uvp_xrcd = (struct ibv_xrc_domain *)&xrcd->ibv_xrcd; + goto end; + +err_xrc: + cl_free(p_umv_buf->p_inout_buf); +err_umv_buf: +end: + return status; +} + +void +mlx4_post_open_xrc_domain ( + IN const ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN OUT ib_xrcd_handle_t *ph_uvp_xrcd, + IN ci_umv_buf_t *p_umv_buf ) +{ + struct ibv_xrc_domain *xrcd = (struct ibv_xrc_domain *)*ph_uvp_xrcd; + struct ibv_open_xrc_domain_resp *p_resp; + + UNREFERENCED_PARAMETER(h_uvp_ca); + + CL_ASSERT(p_umv_buf && p_umv_buf->p_inout_buf); + + p_resp = p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + // Mlx4 code: + + xrcd->handle = p_resp->xrcd_handle; + to_mxrcd(xrcd)->xrcdn = p_resp->xrcdn; + } + else + { + cl_free(to_mxrcd(xrcd)); + } + + cl_free(p_resp); + return; +} + +void +mlx4_post_close_xrc_domain ( + IN const ib_xrcd_handle_t h_uvp_xrcd, + IN ib_api_status_t ioctl_status ) +{ + struct ibv_xrc_domain *xrdc = (struct ibv_xrc_domain *)h_uvp_xrcd; + + CL_ASSERT(xrdc); + + if (IB_SUCCESS == ioctl_status) { + cl_free(to_mxrcd(xrdc)); + } +} +#endif diff --git a/branches/ConnectX/hw/mlx4/user/verbs.h b/branches/ConnectX/hw/mlx4/user/verbs.h new file mode 100644 index 00000000..527b2e9b --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/verbs.h @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INFINIBAND_VERBS_H +#define INFINIBAND_VERBS_H + +#include "l2w.h" + + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +union ibv_gid { + uint8_t raw[16]; + struct { + uint64_t subnet_prefix; + uint64_t interface_id; + } global; +}; + +enum ibv_rate { + IBV_RATE_MAX = 0, + IBV_RATE_2_5_GBPS = 2, + IBV_RATE_5_GBPS = 5, + IBV_RATE_10_GBPS = 3, + IBV_RATE_20_GBPS = 6, + IBV_RATE_30_GBPS = 4, + IBV_RATE_40_GBPS = 7, + IBV_RATE_60_GBPS = 8, + IBV_RATE_80_GBPS = 9, + IBV_RATE_120_GBPS = 10 +}; + +struct ibv_global_route { + union ibv_gid dgid; + uint32_t flow_label; + uint8_t sgid_index; + uint8_t hop_limit; + uint8_t traffic_class; +}; + +struct ibv_grh { + uint32_t version_tclass_flow; + uint16_t paylen; + uint8_t next_hdr; + uint8_t hop_limit; + union ibv_gid sgid; + union ibv_gid dgid; +}; + +struct ibv_ah_attr { + struct ibv_global_route grh; + uint16_t dlid; + uint8_t sl; + uint8_t src_path_bits; + uint8_t static_rate; + uint8_t is_global; + uint8_t port_num; +}; + +struct ibv_xrc_domain { + struct ibv_context *context; + uint64_t handle; +}; + +struct ibv_srq_attr { + uint32_t max_wr; + uint32_t max_sge; + uint32_t srq_limit; +}; + +enum ibv_qp_type { + IBV_QPT_RC = 2, + IBV_QPT_UC, + IBV_QPT_UD, + IBV_QPT_XRC +}; + +struct ibv_qp_cap { + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct ibv_qp_init_attr { + void *qp_context; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_qp_cap cap; + enum ibv_qp_type qp_type; + int sq_sig_all; + struct ibv_xrc_domain *xrc_domain; +}; + +enum ibv_qp_attr_mask { + IBV_QP_STATE = 1 << 0, + IBV_QP_CUR_STATE = 1 << 1, + IBV_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, + IBV_QP_ACCESS_FLAGS = 1 << 3, + IBV_QP_PKEY_INDEX = 1 << 4, + IBV_QP_PORT = 1 << 5, + IBV_QP_QKEY = 1 << 6, + IBV_QP_AV = 1 << 7, + IBV_QP_PATH_MTU = 1 << 8, + IBV_QP_TIMEOUT = 1 << 9, + IBV_QP_RETRY_CNT = 1 << 10, + IBV_QP_RNR_RETRY = 1 << 11, + IBV_QP_RQ_PSN = 1 << 12, + IBV_QP_MAX_QP_RD_ATOMIC = 1 << 13, + IBV_QP_ALT_PATH = 1 << 14, + IBV_QP_MIN_RNR_TIMER = 1 << 15, + IBV_QP_SQ_PSN = 1 << 16, + IBV_QP_MAX_DEST_RD_ATOMIC = 1 << 17, + IBV_QP_PATH_MIG_STATE = 1 << 18, + IBV_QP_CAP = 1 << 19, + IBV_QP_DEST_QPN = 1 << 20 +}; + +enum ibv_qp_state { + IBV_QPS_RESET, + IBV_QPS_INIT, + IBV_QPS_RTR, + IBV_QPS_RTS, + IBV_QPS_SQD, + IBV_QPS_SQE, + IBV_QPS_ERR +}; + +struct ibv_pd { + struct ibv_context *context; + uint64_t handle; +}; + +struct ibv_srq { + struct ibv_context *context; + struct ibv_pd *pd; + uint64_t handle; + uint32_t xrc_srq_num; + struct ibv_xrc_domain *xrc_domain; + struct ibv_cq *xrc_cq; +}; + +struct ibv_qp { + struct ibv_context *context; + struct ibv_pd *pd; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + uint64_t handle; + uint32_t qp_num; + enum ibv_qp_state state; + enum ibv_qp_type qp_type; + struct ibv_xrc_domain *xrc_domain; +}; + +struct ibv_cq { + struct ibv_context *context; + uint64_t handle; + int cqe; +}; + +struct ibv_ah { + struct ibv_context *context; + struct ibv_pd *pd; + ib_av_attr_t av_attr; +}; + +struct ibv_context { + ib_ca_attr_t *p_hca_attr; + int page_size; + pthread_mutex_t mutex; +}; + +//#define HAVE_IBV_XRC_OPS + +/************* CA operations *************************/ +ib_api_status_t +mlx4_pre_open_ca ( + IN const ib_net64_t ca_guid, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_ca_handle_t *ph_uvp_ca ); + +ib_api_status_t +mlx4_post_open_ca ( + IN const ib_net64_t ca_guid, + IN ib_api_status_t ioctl_status, + IN OUT ib_ca_handle_t *ph_uvp_ca, + IN ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_query_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_ca_attr_t *p_ca_attr, + IN size_t byte_count, + IN ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_query_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN ib_ca_attr_t *p_ca_attr, + IN size_t byte_count, + IN ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_post_close_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status ); + +/************* PD Management ***********************/ +extern ib_api_status_t +mlx4_pre_alloc_pd ( + IN const ib_ca_handle_t h_uvp_ca, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_pd_handle_t *ph_uvp_pd ); + +void +mlx4_post_alloc_pd ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN OUT ib_pd_handle_t *ph_uvp_pd, + IN ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_free_pd ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status ); + +/************* CQ Management ***********************/ +ib_api_status_t +mlx4_pre_create_cq ( + IN const ib_ca_handle_t h_uvp_ca, + IN OUT uint32_t* const p_size, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_cq_handle_t *ph_uvp_cq ); + +void +mlx4_post_create_cq ( + IN const ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN const uint32_t size, + IN OUT ib_cq_handle_t *ph_uvp_cq, + IN ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_query_cq ( + IN const ib_cq_handle_t h_uvp_cq, + OUT uint32_t* const p_size, + IN OUT ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_destroy_cq ( + IN const ib_cq_handle_t h_uvp_cq, + IN ib_api_status_t ioctl_status ); + +/************* SRQ Management **********************/ +ib_api_status_t +mlx4_pre_create_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_srq_attr_t *p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_srq_handle_t *ph_uvp_srq ); + +void +mlx4_post_create_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_srq_handle_t *ph_uvp_srq, + IN ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_destroy_srq ( + IN const ib_srq_handle_t h_uvp_srq ); + +void +mlx4_post_destroy_srq ( + IN const ib_srq_handle_t h_uvp_srq, + IN ib_api_status_t ioctl_status ); + +/************* QP Management ***********************/ +ib_api_status_t +mlx4_pre_create_qp ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_qp_create_t *p_create_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_qp_handle_t *ph_uvp_qp ); + +ib_api_status_t +mlx4_post_create_qp ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_qp_handle_t *ph_uvp_qp, + IN ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN const ib_qp_mod_t *p_modify_attr, + IN OUT ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_query_qp ( + IN ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ib_qp_attr_t *p_query_attr, + IN OUT ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_destroy_qp ( + IN const ib_qp_handle_t h_uvp_qp ); + +void +mlx4_post_destroy_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status ); + +void +mlx4_nd_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + OUT void** pp_outbuf, + OUT DWORD* p_size ); + +uint32_t +mlx4_nd_get_qp_state ( + IN const ib_qp_handle_t h_uvp_qp ); + +/************* AV Management ***********************/ +ib_api_status_t +mlx4_pre_create_ah ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_av_attr_t *p_av_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_av_handle_t *ph_uvp_av ); + +ib_api_status_t +mlx4_pre_query_ah ( + IN const ib_av_handle_t h_uvp_av, + IN OUT ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_query_ah ( + IN const ib_av_handle_t h_uvp_av, + IN ib_api_status_t ioctl_status, + IN OUT ib_av_attr_t *p_addr_vector, + IN OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_modify_ah ( + IN const ib_av_handle_t h_uvp_av, + IN const ib_av_attr_t *p_addr_vector, + IN OUT ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_destroy_ah ( + IN const ib_av_handle_t h_uvp_av ); + +#ifdef HAVE_IBV_XRC_OPS +/************* XRC Management **********************/ +ib_api_status_t +mlx4_pre_create_xrc_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_xrcd_handle_t h_uvp_xrcd, + IN const ib_srq_attr_t *p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_srq_handle_t *ph_uvp_srq ); + +ib_api_status_t +mlx4_post_create_xrc_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_srq_handle_t *ph_uvp_srq, + IN ci_umv_buf_t *p_umv_buf ); + +ib_api_status_t +mlx4_pre_open_xrc_domain ( + IN const ib_ca_handle_t h_uvp_ca, + IN const uint32_t oflag, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_xrcd_handle_t *ph_uvp_xrcd ); + +void +mlx4_post_open_xrc_domain ( + IN const ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN OUT ib_xrcd_handle_t *ph_uvp_xrcd, + IN ci_umv_buf_t *p_umv_buf ); + +void +mlx4_post_close_xrc_domain ( + IN const ib_xrcd_handle_t h_uvp_xrcd, + IN ib_api_status_t ioctl_status ); + +#endif /* HAVE_IBV_XRC_OPS */ + +END_C_DECLS + +#endif /* INFINIBAND_VERBS_H */ diff --git a/branches/ConnectX/hw/mlx4/user/wqe.h b/branches/ConnectX/hw/mlx4/user/wqe.h new file mode 100644 index 00000000..fa2f8ac6 --- /dev/null +++ b/branches/ConnectX/hw/mlx4/user/wqe.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef WQE_H +#define WQE_H + +enum { + MLX4_SEND_DOORBELL = 0x14, +}; + +enum { + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICIT = 1 << 1, +}; + +enum { + MLX4_INLINE_SEG = 1 << 31, + MLX4_INLINE_ALIGN = 64, +}; + +enum { + MLX4_INVALID_LKEY = 0x100, +}; + +struct mlx4_wqe_ctrl_seg { + uint32_t owner_opcode; + uint8_t reserved[3]; + uint8_t fence_size; + /* + * High 24 bits are SRC remote buffer; low 8 bits are flags: + * [7] SO (strong ordering) + * [5] TCP/UDP checksum + * [4] IP checksum + * [3:2] C (generate completion queue entry) + * [1] SE (solicited event) + * [0] FL (force loopback) + */ + uint32_t xrcrb_flags; + /* + * imm is immediate data for send/RDMA write w/ immediate; + * also invalidation key for send with invalidate; input + * modifier for WQEs on CCQs. + */ + uint32_t imm; +}; + +struct mlx4_wqe_datagram_seg { + uint32_t av[8]; + uint32_t dqpn; + uint32_t qkey; + uint32_t reserved[2]; +}; + +struct mlx4_wqe_data_seg { + uint32_t byte_count; + uint32_t lkey; + uint64_t addr; +}; + +struct mlx4_wqe_inline_seg { + uint32_t byte_count; +}; + +struct mlx4_wqe_srq_next_seg { + uint16_t reserved1; + uint16_t next_wqe_index; + uint32_t reserved2[3]; +}; + +struct mlx4_wqe_raddr_seg { + uint64_t raddr; + uint32_t rkey; + uint32_t reserved; +}; + +struct mlx4_wqe_atomic_seg { + uint64_t swap_add; + uint64_t compare; +}; + +struct mlx4_wqe_bind_seg { + uint32_t flags1; + uint32_t flags2; + uint32_t new_rkey; + uint32_t lkey; + uint64_t addr; + uint64_t length; +}; + +#endif /* WQE_H */ -- 2.41.0