From d049a1279b829c61576c0d17a6b29489ad5e9383 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 9 Apr 2007 00:49:42 -0700 Subject: [PATCH] Initial import of libmlx4 repository Signed-off-by: Roland Dreier --- .gitignore | 17 ++ AUTHORS | 1 + COPYING | 378 ++++++++++++++++++++++++ Makefile.am | 26 ++ README | 58 ++++ autogen.sh | 8 + config/.gitignore | 8 + configure.in | 76 +++++ debian/changelog | 5 + debian/compat | 1 + debian/control | 52 ++++ debian/copyright | 43 +++ debian/libmlx4-1.install | 1 + debian/libmlx4-dev.install | 1 + debian/rules | 8 + libmlx4.spec.in | 55 ++++ mlx4.driver | 1 + src/.gitignore | 3 + src/ah.c | 60 ++++ src/buf.c | 82 ++++++ src/cq.c | 342 ++++++++++++++++++++++ src/dbrec.c | 125 ++++++++ src/doorbell.h | 63 ++++ src/mlx4-abi.h | 87 ++++++ src/mlx4.c | 262 +++++++++++++++++ src/mlx4.h | 338 +++++++++++++++++++++ src/mlx4.map | 6 + src/qp.c | 442 ++++++++++++++++++++++++++++ src/srq.c | 163 +++++++++++ src/verbs.c | 580 +++++++++++++++++++++++++++++++++++++ src/wqe.h | 120 ++++++++ 31 files changed, 3412 insertions(+) create mode 100644 .gitignore create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 Makefile.am create mode 100644 README create mode 100755 autogen.sh create mode 100644 config/.gitignore create mode 100644 configure.in create mode 100644 debian/changelog create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/copyright create mode 100644 debian/libmlx4-1.install create mode 100644 debian/libmlx4-dev.install create mode 100755 debian/rules create mode 100644 libmlx4.spec.in create mode 100644 mlx4.driver create mode 100644 src/.gitignore create mode 100644 src/ah.c create mode 100644 src/buf.c create mode 100644 src/cq.c create mode 100644 src/dbrec.c create mode 100644 src/doorbell.h create mode 100644 src/mlx4-abi.h create mode 100644 src/mlx4.c create mode 100644 src/mlx4.h create mode 100644 src/mlx4.map create mode 100644 src/qp.c create mode 100644 src/srq.c create mode 100644 src/verbs.c create mode 100644 src/wqe.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c45b09 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +*.o +*.lo +configure +Makefile.in +autom4te.cache +aclocal.m4 +stamp-h.in +config.h.in +config.log +config.h +.libs +.deps +libmlx4.spec +Makefile +config.status +stamp-h1 +libtool diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..ffe1800 --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +Roland Dreier diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..add3d19 --- /dev/null +++ b/COPYING @@ -0,0 +1,378 @@ +This software is available to you under a choice of one of two +licenses. You may choose to be licensed under the terms of the the +OpenIB.org BSD license or the GNU General Public License (GPL) Version +2, both included below. + +Copyright (c) 2007 Cisco, Inc. All rights reserved. + +================================================================== + + OpenIB.org BSD license + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +================================================================== + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..a7afb14 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,26 @@ +AM_CFLAGS = -g -Wall -D_GNU_SOURCE + +mlx4_version_script = @MLX4_VERSION_SCRIPT@ + +MLX4_SOURCES = src/buf.c src/cq.c src/dbrec.c src/mlx4.c src/qp.c \ + src/srq.c src/verbs.c + +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libmlx4.la + src_libmlx4_la_SOURCES = $(MLX4_SOURCES) + src_libmlx4_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + $(mlx4_version_script) + mlx4confdir = $(sysconfdir)/libibverbs.d + mlx4conf_DATA = mlx4.driver +else + mlx4libdir = $(libdir)/infiniband + mlx4lib_LTLIBRARIES = src/mlx4.la + src_mlx4_la_SOURCES = $(MLX4_SOURCES) + src_mlx4_la_LDFLAGS = -avoid-version -module $(mlx4_version_script) +endif + +EXTRA_DIST = src/doorbell.h src/mlx4.h src/mlx4-abi.h src/wqe.h \ + src/mlx4.map libmlx4.spec.in mlx4.driver + +dist-hook: libmlx4.spec + cp libmlx4.spec $(distdir) diff --git a/README b/README new file mode 100644 index 0000000..b66909c --- /dev/null +++ b/README @@ -0,0 +1,58 @@ +Introduction +============ + +libmlx4 is a userspace driver for Mellanox ConnectX InfiniBand HCAs. +It works as a plug-in module for libibverbs that allows programs to +use Mellanox hardware directly from userspace. See the libibverbs +package for more information. + +Using libmlx4 +============== + +libmlx4 will be loaded and used automatically by programs linked with +libibverbs. The ib_mlx4 kernel module must be loaded for HCA devices +to be detected and used. + +Supported Hardware +================== + +libmlx4 currently supports HCAs based on the following Mellanox chip: + + MT25408 ConnectX (PCI Express) + +These HCAs use the mlx4_ib kernel driver. Support for other Mellanox +HCAs, which use the ib_mthca kernel driver, is provided by the +libmthca userspace driver. + +Valgrind Support +================ + +When running applications that use libibverbs under the Valgrind +memory-checking debugger, Valgrind will falsely report "read from +uninitialized" for memory that was initialized by the kernel drivers +or HCA hardware. Specifically, Valgrind cannot see when kernel +drivers or HCA hardware write to userspace memory, so when the process +reads from that memory, Valgrind incorrectly assumes that the memory +contents are uninitialized, and therefore raises a warning. + +libmlx4 can be built with specific support for the Valgrind +memory-checking debugger by specifying the --with-valgrind command +line argument to configure. This flag enables code in libibverbs to +tell Valgrind "this memory may look uninitialized, but it's really +OK," which therefore suppresses the incorrect "read from +uninitialized" warnings. This code adds trivial overhead to the +critical performance path, so it is disabled by default. The intent +is that production users can use a "normal" build of libmlx4 and +developers can use the "valgrind debug" build by simply switching +their OPENIB_DRIVER_PATH environment variables. + +Libmlx4 needs some header files from Valgrind in order to compile this +support; it is important to use the header files from the same version +of Valgrind that will be used at run time. You may need to specify +the directory where Valgrind's header files are installed as an +argument to --with-valgrind. For example + + ./configure --with-valgrind=/opt/valgrind + +will make the libmlx4 build look for valgrind headers in +/opt/valgrind/include diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..fd47839 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,8 @@ +#! /bin/sh + +set -x +aclocal -I config +libtoolize --force --copy +autoheader +automake --foreign --add-missing --copy +autoconf diff --git a/config/.gitignore b/config/.gitignore new file mode 100644 index 0000000..4d4c7b1 --- /dev/null +++ b/config/.gitignore @@ -0,0 +1,8 @@ +mkinstalldirs +depcomp +compile +missing +config.guess +config.sub +ltmain.sh +install-sh diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..b7d274b --- /dev/null +++ b/configure.in @@ -0,0 +1,76 @@ +dnl Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.57) +AC_INIT(libmlx4, 0.1, general@lists.openfabrics.org) +AC_CONFIG_SRCDIR([src/mlx4.h]) +AC_CONFIG_AUX_DIR(config) +AM_CONFIG_HEADER(config.h) +AM_INIT_AUTOMAKE(libmlx4, 0.1) +AM_PROG_LIBTOOL + +AC_ARG_WITH([valgrind], + AC_HELP_STRING([--with-valgrind], + [Enable Valgrind annotations (small runtime overhead, default NO)])) +if test x$with_valgrind = x || test x$with_valgrind = xno; then + want_valgrind=no + AC_DEFINE([NVALGRIND], 1, [disable Valgrind annotations]) +else + want_valgrind=yes + if test -d $with_valgrind; then + CPPFLAGS="$CPPFLAGS -I$with_valgrind/include" + fi +fi + +dnl Checks for programs +AC_PROG_CC + +dnl Checks for libraries +AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], + AC_MSG_ERROR([ibv_get_device_list() not found. libmlx4 requires libibverbs.])) + +dnl Checks for header files. +AC_CHECK_HEADER(infiniband/driver.h, [], + AC_MSG_ERROR([ not found. libmlx4 requires libibverbs.])) +AC_HEADER_STDC +AC_CHECK_HEADER(valgrind/memcheck.h, memcheck_ok=yes, memcheck_ok=no) + +if test $want_valgrind = yes && test $memcheck_ok = no; then + AC_MSG_ERROR([Valgrind memcheck support requested, but not found.]) +fi + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_CHECK_SIZEOF(long) + +dnl Checks for library functions +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_dontfork_range ibv_dofork_range \ + ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) + +AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, + [if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then + ac_cv_version_script=yes + else + ac_cv_version_script=no + fi]) + +if test $ac_cv_version_script = yes; then + MLX4_VERSION_SCRIPT='-Wl,--version-script=$(srcdir)/src/mlx4.map' +else + MLX4_VERSION_SCRIPT= +fi +AC_SUBST(MLX4_VERSION_SCRIPT) + +AC_CONFIG_FILES([Makefile libmlx4.spec]) +AC_OUTPUT diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..ba2961f --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +libmlx4 (0.1-1) unstable; urgency=low + + * Initial release. + + -- Roland Dreier Fri, 6 Apr 2007 10:04:57 -0700 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..7ed6ff8 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +5 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..356a47a --- /dev/null +++ b/debian/control @@ -0,0 +1,52 @@ +Source: libmlx4 +Priority: extra +Maintainer: Roland Dreier +Build-Depends: cdbs (>= 0.4.25-1), debhelper (>= 5), libibverbs-dev (>= 1.0), autotools-dev +Standards-Version: 3.7.2 +Section: libs + +Package: libmlx4-1 +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: A userspace driver for Mellanox ConnectX InfiniBand HCAs + libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand + host channel adapters (HCAs) for the libibverbs library. This allows + userspace processes to access Mellanox HCA hardware directly with + low latency and low overhead. + . + This package contains the loadable plug-in. + . + Homepage: http://www.openfabrics.org/ + +Package: libmlx4-dev +Section: libdevel +Architecture: any +Depends: ${misc:Depends}, libmlx4-1 (= ${Source-Version}) +Description: Development files for the libmlx4 driver + libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand + host channel adapters (HCAs) for the libibverbs library. This allows + userspace processes to access Mellanox HCA hardware directly with + low latency and low overhead. + . + This package contains static versions of libmlx4 that may be linked + directly to an application, which may be useful for debugging. + . + Homepage: http://www.openfabrics.org/ + +Package: libmlx4-1-dbg +Section: libdevel +Priority: extra +Architecture: any +Depends: ${misc:Depends}, libmlx4-1 (= ${Source-Version}) +Description: Debugging symbols for the libmlx4 driver + libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand + host channel adapters (HCAs) for the libibverbs library. This allows + userspace processes to access Mellanox HCA hardware directly with + low latency and low overhead. + . + This package contains the debugging symbols associated with + libmlx4-1. They will automatically be used by gdb for debugging + libmlx4-related issues. + . + Homepage: http://www.openfabrics.org/ diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..91942cc --- /dev/null +++ b/debian/copyright @@ -0,0 +1,43 @@ +Initial Debianization: +This package was debianized by Roland Dreier on +Fri, 6 Apr 2007 10:04:57 -0700 + +Source: +It was downloaded from the OpenFabrics web site at + + +Authors: + Roland Dreier + +Portions are copyrighted by: + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + +libmlx4 is licensed under a choice of one of two licenses. You may +choose to be licensed under the terms of the GNU General Public +License (GPL) Version 2, available from the file +/usr/share/common-licenses/GPL-2 on your Debian system, or the +OpenIB.org BSD license below: + + Redistribution and use in source and binary forms, with or + without modification, are permitted provided that the following + conditions are met: + + - Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + - Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/debian/libmlx4-1.install b/debian/libmlx4-1.install new file mode 100644 index 0000000..4fe7596 --- /dev/null +++ b/debian/libmlx4-1.install @@ -0,0 +1 @@ +usr/lib/infiniband/mlx4.so diff --git a/debian/libmlx4-dev.install b/debian/libmlx4-dev.install new file mode 100644 index 0000000..77ea9e1 --- /dev/null +++ b/debian/libmlx4-dev.install @@ -0,0 +1 @@ +usr/lib/infiniband/mlx4.{a,la} diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..15721aa --- /dev/null +++ b/debian/rules @@ -0,0 +1,8 @@ +#!/usr/bin/make -f +# -*- mode: makefile; coding: utf-8 -*- + +DEB_DH_INSTALL_SOURCEDIR := debian/tmp +DEB_DH_STRIP_ARGS := --dbg-package=libmlx4-1-dbg + +include /usr/share/cdbs/1/rules/debhelper.mk +include /usr/share/cdbs/1/class/autotools.mk diff --git a/libmlx4.spec.in b/libmlx4.spec.in new file mode 100644 index 0000000..52d6c52 --- /dev/null +++ b/libmlx4.spec.in @@ -0,0 +1,55 @@ +Name: libmlx4 +Version: 0.1 +Release: 1%{?dist} +Summary: Mellanox InfiniBand HCA Userspace Driver + +Group: System Environment/Libraries +License: GPL/BSD +Url: http://openib.org/ +Source: http://openib.org/downloads/libmlx4-0.1.tar.gz +BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) + +BuildRequires: libibverbs-devel >= 1.1-0.1.rc2 + +%description +libmlx4 provides a device-specific userspace driver for Mellanox +ConnectX HCAs for use with the libibverbs library. + +%package devel-static +Summary: Development files for the libmlx4 driver +Group: System Environment/Libraries +Requires: %{name} = %{version}-%{release} + +%description devel-static +Static version of libmlx4 that may be linked directly to an +application, which may be useful for debugging. + +%prep +%setup -q -n %{name}-@VERSION@ + +%build +%configure +make %{?_smp_mflags} + +%install +rm -rf $RPM_BUILD_ROOT +make DESTDIR=%{buildroot} install +# remove unpackaged files from the buildroot +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la $RPM_BUILD_ROOT%{_libdir}/libmlx4.so + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%{_libdir}/libmlx4-rdmav2.so +%{_sysconfdir}/libibverbs.d/mlx4.driver +%doc AUTHORS COPYING ChangeLog README + +%files devel-static +%defattr(-,root,root,-) +%{_libdir}/libmlx4.a + +%changelog +* Fri Apr 6 2007 Roland Dreier - 0.1-1 +- Initial Fedora spec file diff --git a/mlx4.driver b/mlx4.driver new file mode 100644 index 0000000..4d29fa8 --- /dev/null +++ b/mlx4.driver @@ -0,0 +1 @@ +driver mlx4 diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..7297cbb --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.la +.dirstamp +.libs diff --git a/src/ah.c b/src/ah.c new file mode 100644 index 0000000..dd4bf88 --- /dev/null +++ b/src/ah.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +#include "mlx4.h" + +int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr, + struct mlx4_ah *ah) +{ + ah->av = malloc(sizeof *ah->av); + if (!ah->av) + return -1; + + memset(ah->av, 0, sizeof *ah->av); + + + return 0; +} + +void mlx4_free_av(struct mlx4_ah *ah) +{ + free(ah->av); +} diff --git a/src/buf.c b/src/buf.c new file mode 100644 index 0000000..0e5f9b6 --- /dev/null +++ b/src/buf.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2006, 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include + +#include "mlx4.h" + +#if !(defined(HAVE_IBV_DONTFORK_RANGE) && defined(HAVE_IBV_DOFORK_RANGE)) + +/* + * If libibverbs isn't exporting these functions, then there's no + * point in doing it here, because the rest of libibverbs isn't going + * to be fork-safe anyway. + */ +static int ibv_dontfork_range(void *base, size_t size) +{ + return 0; +} + +static int ibv_dofork_range(void *base, size_t size) +{ + return 0; +} + +#endif /* HAVE_IBV_DONTFORK_RANGE && HAVE_IBV_DOFORK_RANGE */ + +int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size) +{ + int ret; + + ret = posix_memalign(&buf->buf, page_size, align(size, page_size)); + if (ret) + return ret; + + ret = ibv_dontfork_range(buf->buf, size); + if (ret) + free(buf->buf); + + if (!ret) + buf->length = size; + + return ret; +} + +void mlx4_free_buf(struct mlx4_buf *buf) +{ + ibv_dofork_range(buf->buf, buf->length); + free(buf->buf); +} diff --git a/src/cq.c b/src/cq.c new file mode 100644 index 0000000..aae5c37 --- /dev/null +++ b/src/cq.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +#include + +#include "mlx4.h" +#include "doorbell.h" + +enum { + MLX4_CQ_DOORBELL = 0x20 +}; + +enum { + CQ_OK = 0, + CQ_EMPTY = -1, + CQ_POLL_ERR = -2 +}; + +#define MLX4_CQ_DB_REQ_NOT_SOL (1 << 24) +#define MLX4_CQ_DB_REQ_NOT (2 << 24) + +enum { + MLX4_CQE_OWNER_MASK = 0x80, + MLX4_CQE_IS_SEND_MASK = 0x40, + MLX4_CQE_OPCODE_MASK = 0x1f +}; + +enum { + SYNDROME_LOCAL_LENGTH_ERR = 0x01, + SYNDROME_LOCAL_QP_OP_ERR = 0x02, + SYNDROME_LOCAL_EEC_OP_ERR = 0x03, + SYNDROME_LOCAL_PROT_ERR = 0x04, + SYNDROME_WR_FLUSH_ERR = 0x05, + SYNDROME_MW_BIND_ERR = 0x06, + SYNDROME_BAD_RESP_ERR = 0x10, + SYNDROME_LOCAL_ACCESS_ERR = 0x11, + SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + SYNDROME_REMOTE_ACCESS_ERR = 0x13, + SYNDROME_REMOTE_OP_ERR = 0x14, + SYNDROME_RETRY_EXC_ERR = 0x15, + SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20, + SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21, + SYNDROME_REMOTE_ABORTED_ERR = 0x22, + SYNDROME_INVAL_EECN_ERR = 0x23, + SYNDROME_INVAL_EEC_STATE_ERR = 0x24 +}; + +struct mlx4_cqe { + uint32_t my_qpn; + uint32_t immed_rss_invalid; + uint32_t g_mlpath_rqpn; + uint8_t sl; + uint8_t reserved1; + uint16_t rlid; + uint32_t reserved2; + uint32_t byte_cnt; + uint16_t wqe_index; + uint16_t checksum; + uint8_t reserved3[3]; + uint8_t owner_sr_opcode; +}; + +struct mlx4_err_cqe { + uint32_t my_qpn; + uint32_t reserved1[5]; + uint16_t wqe_index; + uint8_t vendor_err; + uint8_t syndrome; + uint8_t reserved2[3]; + uint8_t owner_sr_opcode; +}; + +static inline struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry) +{ + return cq->buf.buf + entry * MLX4_CQ_ENTRY_SIZE; +} + +static inline struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq) +{ + struct mlx4_cqe *cqe = get_cqe(cq, cq->cons_index & cq->ibv_cq.cqe); + + return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + !!(cq->cons_index & (cq->ibv_cq.cqe + 1))) ? NULL : cqe; +} + +static void update_cons_index(struct mlx4_cq *cq) +{ + *cq->set_ci_db = htonl(cq->cons_index & 0xffffff); +} + +static int handle_error_cqe(struct mlx4_cq *cq, struct mlx4_qp *qp, + int wqe_index, int is_send, + struct mlx4_err_cqe *cqe, + struct ibv_wc *wc) +{ + /* XXX handle error CQE */ + return 0; +} + +static int mlx4_poll_one(struct mlx4_cq *cq, + struct mlx4_qp **cur_qp, + struct ibv_wc *wc) +{ + struct mlx4_wq *wq; + struct mlx4_cqe *cqe; + struct mlx4_srq *srq; + uint32_t qpn; + uint16_t wqe_index; + int is_error; + int is_send; + int err = 0; + + cqe = next_cqe_sw(cq); + if (!cqe) + return CQ_EMPTY; + + ++cq->cons_index; + + VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe); + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + qpn = ntohl(cqe->my_qpn); + + is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; + is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR; + + if (!*cur_qp || + (ntohl(cqe->my_qpn) & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), + ntohl(cqe->my_qpn) & 0xffffff); + if (!*cur_qp) + return CQ_POLL_ERR; + } + + wc->qp_num = (*cur_qp)->ibv_qp.qp_num; + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_index = ntohs(cqe->wqe_index); + wq->tail += wqe_index - (uint16_t) wq->tail; + wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibv_qp.srq) { + srq = to_msrq((*cur_qp)->ibv_qp.srq); + wqe_index = htons(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_index]; + mlx4_free_srq_wqe(srq, wqe_index); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; + ++wq->tail; + } + + if (is_error) { + err = handle_error_cqe(cq, *cur_qp, wqe_index, is_send, + (struct mlx4_err_cqe *) cqe, wc); + return err; + } + + wc->status = IBV_WC_SUCCESS; + + if (is_send) { + wc->wc_flags = 0; + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_OPCODE_RDMA_WRITE_IMM: + wc->wc_flags |= IBV_WC_WITH_IMM; + case MLX4_OPCODE_RDMA_WRITE: + wc->opcode = IBV_WC_RDMA_WRITE; + break; + case MLX4_OPCODE_SEND_IMM: + wc->wc_flags |= IBV_WC_WITH_IMM; + case MLX4_OPCODE_SEND: + wc->opcode = IBV_WC_SEND; + break; + case MLX4_OPCODE_RDMA_READ: + wc->opcode = IBV_WC_RDMA_READ; + wc->byte_len = ntohl(cqe->byte_cnt); + break; + case MLX4_OPCODE_ATOMIC_CS: + wc->opcode = IBV_WC_COMP_SWAP; + /* XXX byte_len? */ + break; + case MLX4_OPCODE_ATOMIC_FA: + wc->opcode = IBV_WC_FETCH_ADD; + /* XXX byte_len? */ + break; + case MLX4_OPCODE_BIND_MW: + wc->opcode = IBV_WC_BIND_MW; + break; + default: + /* assume it's a send completion */ + wc->opcode = IBV_WC_SEND; + break; + } + } else { + wc->byte_len = ntohl(cqe->byte_cnt); + + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: + wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IBV_WC_WITH_IMM; + wc->imm_data = cqe->immed_rss_invalid; + break; + case MLX4_RECV_OPCODE_SEND: + wc->opcode = IBV_WC_RECV; + wc->wc_flags = 0; + break; + case MLX4_RECV_OPCODE_SEND_IMM: + wc->opcode = IBV_WC_RECV; + wc->wc_flags = IBV_WC_WITH_IMM; + wc->imm_data = cqe->immed_rss_invalid; + break; + } + + wc->slid = ntohs(cqe->rlid); + wc->sl = cqe->sl >> 4; + wc->src_qp = ntohl(cqe->g_mlpath_rqpn) & 0xffffff; + wc->dlid_path_bits = (ntohl(cqe->g_mlpath_rqpn) >> 24) & 0x7f; + wc->pkey_index = ntohl(cqe->immed_rss_invalid) >> 16; + wc->wc_flags |= ntohs(cqe->g_mlpath_rqpn) & 0x80000000 ? + IBV_WC_GRH : 0; + } + + return 0; +} + +int mlx4_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) +{ + struct mlx4_cq *cq = to_mcq(ibcq); + struct mlx4_qp *qp = NULL; + int npolled; + int err = CQ_OK; + + pthread_spin_lock(&cq->lock); + + for (npolled = 0; npolled < ne; ++npolled) { + err = mlx4_poll_one(cq, &qp, wc + npolled); + if (err != CQ_OK) + break; + } + + if (npolled) + update_cons_index(cq); + + pthread_spin_unlock(&cq->lock); + + return err == CQ_POLL_ERR ? err : npolled; +} + +int mlx4_arm_cq(struct ibv_cq *ibvcq, int solicited) +{ + struct mlx4_cq *cq = to_mcq(ibvcq); + uint32_t doorbell[2]; + uint32_t sn; + uint32_t ci; + uint32_t cmd; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT; + + *cq->arm_db = htonl(sn << 28 | cmd | ci); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = htonl(sn << 28 | cmd | cq->cqn); + doorbell[1] = htonl(ci); + + mlx4_write64(doorbell, to_mctx(ibvcq->context), MLX4_CQ_DOORBELL); + + return 0; +} + +void mlx4_cq_event(struct ibv_cq *cq) +{ + to_mcq(cq)->arm_sn++; +} + +void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) +{ +} + +void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int old_cqe) +{ +} diff --git a/src/dbrec.c b/src/dbrec.c new file mode 100644 index 0000000..9cff0d8 --- /dev/null +++ b/src/dbrec.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +#include "mlx4.h" + +struct mlx4_db_page { + struct mlx4_db_page *prev, *next; + struct mlx4_buf buf; + int num_db; + int use_cnt; + unsigned free[0]; +}; + +static const int db_size[] = { + [MLX4_DB_TYPE_CQ] = 8, + [MLX4_DB_TYPE_RQ] = 4, +}; + +static struct mlx4_db_page *__add_page(struct mlx4_context *context, + enum mlx4_db_type type) +{ + struct mlx4_db_page *page; + int pp; + int i; + + pp = to_mdev(context->ibv_ctx.device)->page_size / db_size[type]; + + page = malloc(sizeof *page + pp / 8); + if (!page) + return NULL; + + if (mlx4_alloc_buf(&page->buf, to_mdev(context->ibv_ctx.device)->page_size, + to_mdev(context->ibv_ctx.device)->page_size)) { + free(page); + return NULL; + } + + page->num_db = pp; + page->use_cnt = 0; + for (i = 0; i < pp / (sizeof (int) * 8); ++i) + page->free[i] = ~0; + + page->prev = NULL; + page->next = context->db_list[type]; + context->db_list[type] = page; + if (page->next) + page->next->prev = page; + + return page; +} + +uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type) +{ + struct mlx4_db_page *page; + uint32_t *db = NULL; + int i, j; + + pthread_mutex_lock(&context->db_list_mutex); + + for (page = context->db_list[type]; page; page = page->next) + if (page->use_cnt < page->num_db) + goto found; + + page = __add_page(context, type); + if (!page) + goto out; + +found: + ++page->use_cnt; + + for (i = 0; !page->free[i]; ++i) + /* nothing */; + + j = ffs(page->free[i]); + page->free[i] &= ~(1 << (j - 1)); + db = page->buf.buf + (i * 8 * sizeof (int) + (j - 1)) * db_size[type]; + +out: + pthread_mutex_unlock(&context->db_list_mutex); + + return db; +} + +void mlx4_free_db(struct mlx4_context *context, uint32_t *db) +{ + /*XXX nothing for now*/ +} diff --git a/src/doorbell.h b/src/doorbell.h new file mode 100644 index 0000000..3171e76 --- /dev/null +++ b/src/doorbell.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef DOORBELL_H +#define DOORBELL_H + +#if SIZEOF_LONG == 8 + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define MLX4_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0]) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define MLX4_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1]) +#else +# error __BYTE_ORDER not defined +#endif + +static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset) +{ + *(volatile uint64_t *) (ctx->uar + offset) = MLX4_PAIR_TO_64(val); +} + +#else + +static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset) +{ + pthread_spin_lock(&ctx->uar_lock); + *(volatile uint32_t *) (ctx->uar + offset) = val[0]; + *(volatile uint32_t *) (ctx->uar + offset + 4) = val[1]; + pthread_spin_unlock(&ctx->uar_lock); +} + +#endif + +#endif /* DOORBELL_H */ diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h new file mode 100644 index 0000000..2a392cb --- /dev/null +++ b/src/mlx4-abi.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_ABI_H +#define MLX4_ABI_H + +#include + +#define MLX4_UVERBS_ABI_VERSION 1 + +struct mlx4_alloc_ucontext_resp { + struct ibv_get_context_resp ibv_resp; + __u32 qp_tab_size; + __u32 bf_reg_size; +}; + +struct mlx4_alloc_pd_resp { + struct ibv_alloc_pd_resp ibv_resp; + __u32 pdn; + __u32 reserved; +}; + +struct mlx4_create_cq { + struct ibv_create_cq ibv_cmd; + __u64 buf_addr; + __u64 db_addr; +}; + +struct mlx4_create_cq_resp { + struct ibv_create_cq_resp ibv_resp; + __u32 cqn; + __u32 reserved; +}; + +struct mlx4_resize_cq { + struct ibv_resize_cq ibv_cmd; + __u64 buf_addr; +}; + +struct mlx4_create_srq { + struct ibv_create_srq ibv_cmd; + __u64 buf_addr; + __u64 db_addr; +}; + +struct mlx4_create_srq_resp { + struct ibv_create_srq_resp ibv_resp; + __u32 srqn; + __u32 reserved; +}; + +struct mlx4_create_qp { + struct ibv_create_qp ibv_cmd; + __u64 buf_addr; + __u64 db_addr; +}; + +#endif /* MLX4_ABI_H */ diff --git a/src/mlx4.c b/src/mlx4.c new file mode 100644 index 0000000..95f30d0 --- /dev/null +++ b/src/mlx4.c @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef HAVE_IBV_REGISTER_DRIVER +#include +#endif + +#ifndef HAVE_IBV_READ_SYSFS_FILE +#include +#include +#include +#endif + +#include "mlx4.h" +#include "mlx4-abi.h" + +#ifndef PCI_VENDOR_ID_MELLANOX +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_HERMON +#define PCI_DEVICE_ID_MELLANOX_HERMON 0x6340 +#endif + +#define HCA(v, d) \ + { .vendor = PCI_VENDOR_ID_##v, \ + .device = PCI_DEVICE_ID_MELLANOX_##d } + +struct { + unsigned vendor; + unsigned device; +} hca_table[] = { + HCA(MELLANOX, HERMON), +}; + +static struct ibv_context_ops mlx4_ctx_ops = { + .query_device = mlx4_query_device, + .query_port = mlx4_query_port, + .alloc_pd = mlx4_alloc_pd, + .dealloc_pd = mlx4_free_pd, + .reg_mr = mlx4_reg_mr, + .dereg_mr = mlx4_dereg_mr, + .create_cq = mlx4_create_cq, + .poll_cq = mlx4_poll_cq, + .req_notify_cq = mlx4_arm_cq, + .cq_event = mlx4_cq_event, + .resize_cq = mlx4_resize_cq, + .destroy_cq = mlx4_destroy_cq, + .create_srq = mlx4_create_srq, + .modify_srq = mlx4_modify_srq, + .query_srq = mlx4_query_srq, + .destroy_srq = mlx4_destroy_srq, + .post_srq_recv = mlx4_post_srq_recv, + .create_qp = mlx4_create_qp, + .query_qp = mlx4_query_qp, + .modify_qp = mlx4_modify_qp, + .destroy_qp = mlx4_destroy_qp, + .post_send = mlx4_post_send, + .post_recv = mlx4_post_recv, + .create_ah = mlx4_create_ah, + .destroy_ah = mlx4_destroy_ah, + .attach_mcast = mlx4_attach_mcast, + .detach_mcast = mlx4_detach_mcast +}; + +static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd) +{ + struct mlx4_context *context; + struct ibv_get_context cmd; + struct mlx4_alloc_ucontext_resp resp; + int i; + + context = malloc(sizeof *context); + if (!context) + return NULL; + + context->ibv_ctx.cmd_fd = cmd_fd; + + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp)) + goto err_free; + + context->num_qps = resp.qp_tab_size; + context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; + context->qp_table_mask = (1 << context->qp_table_shift) - 1; + + pthread_mutex_init(&context->qp_table_mutex, NULL); + for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + + for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) + context->db_list[i] = NULL; + + pthread_mutex_init(&context->db_list_mutex, NULL); + + context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, + MAP_SHARED, cmd_fd, 0); + if (context->uar == MAP_FAILED) + goto err_free; + + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); + + context->ibv_ctx.ops = mlx4_ctx_ops; + + return &context->ibv_ctx; + +err_free: + free(context); + return NULL; +} + +static void mlx4_free_context(struct ibv_context *ibctx) +{ + struct mlx4_context *context = to_mctx(ibctx); + + munmap(context->uar, to_mdev(ibctx->device)->page_size); + free(context); +} + +static struct ibv_device_ops mlx4_dev_ops = { + .alloc_context = mlx4_alloc_context, + .free_context = mlx4_free_context +}; + +/* + * Keep a private implementation of HAVE_IBV_READ_SYSFS_FILE to handle + * old versions of libibverbs that didn't implement it. This can be + * removed when libibverbs 1.0.3 or newer is available "everywhere." + */ +#ifndef HAVE_IBV_READ_SYSFS_FILE +static int ibv_read_sysfs_file(const char *dir, const char *file, + char *buf, size_t size) +{ + char path[256]; + int fd; + int len; + + snprintf(path, sizeof path, "%s/%s", dir, file); + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, buf, size); + + close(fd); + + if (len > 0 && buf[len - 1] == '\n') + buf[--len] = '\0'; + + return len; +} +#endif /* HAVE_IBV_READ_SYSFS_FILE */ + +static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + char value[8]; + struct mlx4_device *dev; + unsigned vendor, device; + int i; + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + value, sizeof value) < 0) + return NULL; + sscanf(value, "%i", &vendor); + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", + value, sizeof value) < 0) + return NULL; + sscanf(value, "%i", &device); + + for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) + if (vendor == hca_table[i].vendor && + device == hca_table[i].device) + goto found; + + return NULL; + +found: + if (abi_version > MLX4_UVERBS_ABI_VERSION) { + fprintf(stderr, PFX "Fatal: ABI version %d of %s is too new (expected %d)\n", + abi_version, uverbs_sys_path, MLX4_UVERBS_ABI_VERSION); + return NULL; + } + + dev = malloc(sizeof *dev); + if (!dev) { + fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", + uverbs_sys_path); + return NULL; + } + + dev->ibv_dev.ops = mlx4_dev_ops; + dev->page_size = sysconf(_SC_PAGESIZE); + + return &dev->ibv_dev; +} + +#ifdef HAVE_IBV_REGISTER_DRIVER +static __attribute__((constructor)) void mlx4_register_driver(void) +{ + ibv_register_driver("mlx4", mlx4_driver_init); +} +#else +/* + * Export the old libsysfs sysfs_class_device-based driver entry point + * if libibverbs does not export an ibv_register_driver() function. + */ +struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) +{ + int abi_ver = 0; + char value[8]; + + if (ibv_read_sysfs_file(sysdev->path, "abi_version", + value, sizeof value) > 0) + abi_ver = strtol(value, NULL, 10); + + return mlx4_driver_init(sysdev->path, abi_ver); +} +#endif /* HAVE_IBV_REGISTER_DRIVER */ diff --git a/src/mlx4.h b/src/mlx4.h new file mode 100644 index 0000000..8b4dc20 --- /dev/null +++ b/src/mlx4.h @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_H +#define MLX4_H + +#include + +#include +#include + +#ifdef HAVE_VALGRIND_MEMCHECK_H + +# include + +# if !defined(VALGRIND_MAKE_MEM_DEFINED) || !defined(VALGRIND_MAKE_MEM_UNDEFINED) +# warning "Valgrind support requested, but VALGRIND_MAKE_MEM_(UN)DEFINED not available" +# endif + +#endif /* HAVE_VALGRIND_MEMCHECK_H */ + +#ifndef VALGRIND_MAKE_MEM_DEFINED +# define VALGRIND_MAKE_MEM_DEFINED(addr,len) +#endif + +#ifndef VALGRIND_MAKE_MEM_UNDEFINED +# define VALGRIND_MAKE_MEM_UNDEFINED(addr,len) +#endif + +#ifndef rmb +# define rmb() mb() +#endif + +#ifndef wmb +# define wmb() mb() +#endif + +#define HIDDEN __attribute__((visibility ("hidden"))) + +#define PFX "mlx4: " + +enum { + MLX4_CQ_ENTRY_SIZE = 0x20 +}; + +enum { + MLX4_STAT_RATE_OFFSET = 5 +}; + +enum { + MLX4_QP_TABLE_BITS = 8, + MLX4_QP_TABLE_SIZE = 1 << MLX4_QP_TABLE_BITS, + MLX4_QP_TABLE_MASK = MLX4_QP_TABLE_SIZE - 1 +}; + +enum mlx4_db_type { + MLX4_DB_TYPE_CQ, + MLX4_DB_TYPE_RQ, + MLX4_NUM_DB_TYPE +}; + +enum { + MLX4_OPCODE_NOP = 0x00, + MLX4_OPCODE_SEND_INVAL = 0x01, + MLX4_OPCODE_RDMA_WRITE = 0x08, + MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX4_OPCODE_SEND = 0x0a, + MLX4_OPCODE_SEND_IMM = 0x0b, + MLX4_OPCODE_LSO = 0x0e, + MLX4_OPCODE_RDMA_READ = 0x10, + MLX4_OPCODE_ATOMIC_CS = 0x11, + MLX4_OPCODE_ATOMIC_FA = 0x12, + MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, + MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, + MLX4_OPCODE_BIND_MW = 0x18, + MLX4_OPCODE_FMR = 0x19, + MLX4_OPCODE_LOCAL_INVAL = 0x1b, + MLX4_OPCODE_CONFIG_CMD = 0x1f, + + MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX4_RECV_OPCODE_SEND = 0x01, + MLX4_RECV_OPCODE_SEND_IMM = 0x02, + MLX4_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX4_CQE_OPCODE_ERROR = 0x1e, + MLX4_CQE_OPCODE_RESIZE = 0x16, +}; + +struct mlx4_device { + struct ibv_device ibv_dev; + int page_size; +}; + +struct mlx4_db_page; + +struct mlx4_context { + struct ibv_context ibv_ctx; + + void *uar; + pthread_spinlock_t uar_lock; + + struct { + struct mlx4_qp **table; + int refcnt; + } qp_table[MLX4_QP_TABLE_SIZE]; + pthread_mutex_t qp_table_mutex; + int num_qps; + int qp_table_shift; + int qp_table_mask; + + struct mlx4_db_page *db_list[MLX4_NUM_DB_TYPE]; + pthread_mutex_t db_list_mutex; +}; + +struct mlx4_buf { + void *buf; + size_t length; +}; + +struct mlx4_pd { + struct ibv_pd ibv_pd; + uint32_t pdn; +}; + +struct mlx4_cq { + struct ibv_cq ibv_cq; + struct mlx4_buf buf; + pthread_spinlock_t lock; + uint32_t cqn; + uint32_t cons_index; + uint32_t *set_ci_db; + uint32_t *arm_db; + int arm_sn; +}; + +struct mlx4_srq { + struct ibv_srq ibv_srq; + struct mlx4_buf buf; + pthread_spinlock_t lock; + uint64_t *wrid; + uint32_t srqn; + int max; + int max_gs; + int wqe_shift; + int head; + int tail; + uint32_t *db; + uint16_t counter; +}; + +struct mlx4_wq { + uint64_t *wrid; + pthread_spinlock_t lock; + int max; + unsigned head; + unsigned tail; + int max_gs; + int wqe_shift; + int offset; +}; + +struct mlx4_qp { + struct ibv_qp ibv_qp; + struct mlx4_buf buf; + int max_inline_data; + int buf_size; + + uint32_t doorbell_qpn; + uint32_t sq_signal_bits; + struct mlx4_wq sq; + + uint32_t *db; + struct mlx4_wq rq; +}; + +struct mlx4_av { + uint32_t port_pd; + uint8_t reserved1; + uint8_t g_slid; + uint16_t dlid; + uint8_t reserved2; + uint8_t gid_index; + uint8_t stat_rate; + uint8_t hop_limit; + uint32_t sl_tclass_flowlabel; + uint8_t dgid[16]; +}; + +struct mlx4_ah { + struct ibv_ah ibv_ah; + struct mlx4_av av; +}; + +static inline unsigned long align(unsigned long val, unsigned long align) +{ + return (val + align - 1) & ~(align - 1); +} + +#define to_mxxx(xxx, type) \ + ((struct mlx4_##type *) \ + ((void *) ib##xxx - offsetof(struct mlx4_##type, ibv_##xxx))) + +static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev) +{ + return to_mxxx(dev, device); +} + +static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx) +{ + return to_mxxx(ctx, context); +} + +static inline struct mlx4_pd *to_mpd(struct ibv_pd *ibpd) +{ + return to_mxxx(pd, pd); +} + +static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq) +{ + return to_mxxx(cq, cq); +} + +static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq) +{ + return to_mxxx(srq, srq); +} + +static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp) +{ + return to_mxxx(qp, qp); +} + +static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah) +{ + return to_mxxx(ah, ah); +} + +int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size); +void mlx4_free_buf(struct mlx4_buf *buf); + +uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type); +void mlx4_free_db(struct mlx4_context *context, uint32_t *db); + +int mlx4_query_device(struct ibv_context *context, + struct ibv_device_attr *attr); +int mlx4_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + +struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context); +int mlx4_free_pd(struct ibv_pd *pd); + +struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, enum ibv_access_flags access); +int mlx4_dereg_mr(struct ibv_mr *mr); + +struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); +int mlx4_resize_cq(struct ibv_cq *cq, int cqe); +int mlx4_destroy_cq(struct ibv_cq *cq); +int mlx4_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); +int mlx4_arm_cq(struct ibv_cq *cq, int solicited); +void mlx4_cq_event(struct ibv_cq *cq); +void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, + struct mlx4_srq *srq); +void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe); + +struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *attr); +int mlx4_modify_srq(struct ibv_srq *srq, + struct ibv_srq_attr *attr, + enum ibv_srq_attr_mask mask); +int mlx4_query_srq(struct ibv_srq *srq, + struct ibv_srq_attr *attr); +int mlx4_destroy_srq(struct ibv_srq *srq); +int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, + struct mlx4_srq *srq); +void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind); +int mlx4_post_srq_recv(struct ibv_srq *ibsrq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); + +struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); +int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr); +int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); +int mlx4_destroy_qp(struct ibv_qp *qp); +void mlx4_init_qp_indices(struct mlx4_qp *qp); +int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); +int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); +int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + enum ibv_qp_type type, struct mlx4_qp *qp); +struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn); +int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp); +void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn); +struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); +int mlx4_destroy_ah(struct ibv_ah *ah); +int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr, + struct mlx4_ah *ah); +void mlx4_free_av(struct mlx4_ah *ah); +int mlx4_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); +int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); + +#endif /* MLX4_H */ diff --git a/src/mlx4.map b/src/mlx4.map new file mode 100644 index 0000000..59a8bae --- /dev/null +++ b/src/mlx4.map @@ -0,0 +1,6 @@ +{ + global: + ibv_driver_init; + openib_driver_init; + local: *; +}; diff --git a/src/qp.c b/src/qp.c new file mode 100644 index 0000000..36a18f0 --- /dev/null +++ b/src/qp.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +#include "mlx4.h" +#include "doorbell.h" +#include "wqe.h" + +static const uint32_t mlx4_ib_opcode[] = { + [IBV_WR_SEND] = MLX4_OPCODE_SEND, + [IBV_WR_SEND_WITH_IMM] = MLX4_OPCODE_SEND_IMM, + [IBV_WR_RDMA_WRITE] = MLX4_OPCODE_RDMA_WRITE, + [IBV_WR_RDMA_WRITE_WITH_IMM] = MLX4_OPCODE_RDMA_WRITE_IMM, + [IBV_WR_RDMA_READ] = MLX4_OPCODE_RDMA_READ, + [IBV_WR_ATOMIC_CMP_AND_SWP] = MLX4_OPCODE_ATOMIC_CS, + [IBV_WR_ATOMIC_FETCH_AND_ADD] = MLX4_OPCODE_ATOMIC_FA, +}; + +static void *get_recv_wqe(struct mlx4_qp *qp, int n) +{ + return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); +} + +static void *get_send_wqe(struct mlx4_qp *qp, int n) +{ + return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); +} + +void mlx4_init_qp_indices(struct mlx4_qp *qp) +{ + qp->sq.head = 0; + qp->sq.tail = 0; + qp->rq.head = 0; + qp->rq.tail = 0; +} + +static inline int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq) +{ + unsigned cur; + + cur = wq->head - wq->tail; + if (cur + nreq < wq->max) + return 0; + + pthread_spin_lock(&cq->lock); + cur = wq->head - wq->tail; + pthread_spin_unlock(&cq->lock); + + return cur + nreq >= wq->max; +} + +int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + struct mlx4_qp *qp = to_mqp(ibqp); + void *wqe; + struct mlx4_wqe_ctrl_seg *ctrl; + int ind; + int nreq; + int ret = 0; + int size; + int i; + + pthread_spin_lock(&qp->sq.lock); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.head; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) { + ret = -1; + *bad_wr = wr; + goto out; + } + + if (wr->num_sge > qp->sq.max_gs) { + ret = -1; + *bad_wr = wr; + goto out; + } + + if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) { + ret = -1; + *bad_wr = wr; + goto out; + } + + ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1)); + qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id; + + ctrl->srcrb_flags = + (wr->send_flags & IBV_SEND_SIGNALED ? + htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | + (wr->send_flags & IBV_SEND_SOLICITED ? + htonl(MLX4_WQE_CTRL_SOLICIT) : 0) | + qp->sq_signal_bits; + + if (wr->opcode == IBV_WR_SEND_WITH_IMM || + wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) + ctrl->imm = wr->imm_data; + else + ctrl->imm = 0; + + wqe += sizeof *ctrl; + size = sizeof *ctrl / 16; + + switch (ibqp->qp_type) { + case IBV_QPT_RC: + case IBV_QPT_UC: + switch (wr->opcode) { + case IBV_WR_ATOMIC_CMP_AND_SWP: + case IBV_WR_ATOMIC_FETCH_AND_ADD: + /*XXX*/ + break; + + case IBV_WR_RDMA_WRITE: + case IBV_WR_RDMA_WRITE_WITH_IMM: + case IBV_WR_RDMA_READ: + /*XXX*/ + break; + + default: + /* No extra segments required for sends */ + break; + } + break; + + case IBV_QPT_UD: + memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, + &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); + ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = + htonl(wr->wr.ud.remote_qpn); + ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = + htonl(wr->wr.ud.remote_qkey); + + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + break; + + default: + break; + } + + if (wr->send_flags & IBV_SEND_INLINE) { + /*XXX handle inline send */ + } else { + struct mlx4_wqe_data_seg *seg = wqe; + + for (i = 0; i < wr->num_sge; ++i) { + seg[i].byte_count = htonl(wr->sg_list[i].length); + seg[i].lkey = htonl(wr->sg_list[i].lkey); + seg[i].addr = htonll(wr->sg_list[i].addr); + } + + size += wr->num_sge * (sizeof *seg / 16); + } + + ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ? + MLX4_WQE_CTRL_FENCE : 0) | size; + + /* + * Make sure descriptor is fully written before + * setting ownership bit (because HW can start + * executing as soon as we do). + */ + wmb(); + + ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) | + (ind & qp->sq.max ? htonl(1 << 31) : 0); + + ++ind; + } + +out: + if (nreq) { + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *(uint32_t *) (to_mctx(ibqp->context)->uar + MLX4_SEND_DOORBELL) = + qp->doorbell_qpn; + } + + pthread_spin_unlock(&qp->sq.lock); + + return ret; +} + +int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct mlx4_qp *qp = to_mqp(ibqp); + struct mlx4_wqe_data_seg *scat; + int ret = 0; + int nreq; + int ind; + int i; + + pthread_spin_lock(&qp->rq.lock); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.head & (qp->rq.max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) { + ret = -1; + *bad_wr = wr; + goto out; + } + + if (wr->num_sge > qp->rq.max_gs) { + ret = -1; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + + for (i = 0; i < wr->num_sge; ++i) { + scat[i].byte_count = htonl(wr->sg_list[i].length); + scat[i].lkey = htonl(wr->sg_list[i].lkey); + scat[i].addr = htonll(wr->sg_list[i].addr); + } + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = htonl(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.max - 1); + } + +out: + if (nreq) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db = htonl(qp->rq.head & 0xffff); + } + + pthread_spin_unlock(&qp->rq.lock); + + return ret; +} + +int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + enum ibv_qp_type type, struct mlx4_qp *qp) +{ + struct mlx4_wqe_ctrl_seg *ctrl; + int size; + int max_sq_sge; + int i; + + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = cap->max_send_sge; + max_sq_sge = align(cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg), + sizeof (struct mlx4_wqe_data_seg)) / sizeof (struct mlx4_wqe_data_seg); + if (max_sq_sge < cap->max_send_sge) + max_sq_sge = cap->max_send_sge; + + qp->sq.wrid = malloc(qp->sq.max * sizeof (uint64_t)); + if (!qp->sq.wrid) + return -1; + + qp->rq.wrid = malloc(qp->rq.max * sizeof (uint64_t)); + if (!qp->rq.wrid) { + free(qp->sq.wrid); + return -1; + } + + size = qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg); + + for (qp->rq.wqe_shift = 4; 1 << qp->rq.wqe_shift < size; + qp->rq.wqe_shift++) + ; /* nothing */ + + size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg); + switch (type) { + case IBV_QPT_UD: + size += sizeof (struct mlx4_wqe_datagram_seg); + break; + + case IBV_QPT_UC: + size += sizeof (struct mlx4_raddr_seg); + break; + + case IBV_QPT_RC: + size += sizeof (struct mlx4_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + if (size < (sizeof (struct mlx4_atomic_seg) + + sizeof (struct mlx4_raddr_seg) + + sizeof (struct mlx4_wqe_data_seg))) + size = (sizeof (struct mlx4_atomic_seg) + + sizeof (struct mlx4_raddr_seg) + + sizeof (struct mlx4_wqe_data_seg)); + break; + + default: + break; + } + + /* Make sure that we have enough space for a bind request */ + if (size < sizeof (struct mlx4_bind_seg)) + size = sizeof (struct mlx4_bind_seg); + + size += sizeof (struct mlx4_wqe_ctrl_seg); + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; /* nothing */ + + qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + + (qp->sq.max << qp->sq.wqe_shift); + if (qp->rq.wqe_shift > qp->sq.wqe_shift) { + qp->rq.offset = 0; + qp->sq.offset = qp->rq.max << qp->rq.wqe_shift; + } else { + qp->rq.offset = qp->sq.max << qp->sq.wqe_shift; + qp->sq.offset = 0; + } + + if (mlx4_alloc_buf(&qp->buf, + align(qp->buf_size, to_mdev(pd->context->device)->page_size), + to_mdev(pd->context->device)->page_size)) { + free(qp->sq.wrid); + free(qp->rq.wrid); + return -1; + } + + memset(qp->buf.buf, 0, qp->buf_size); + + for (i = 0; i < qp->sq.max; ++i) { + ctrl = get_send_wqe(qp, i); + ctrl->owner_opcode = htonl(1 << 31); + } + + return 0; +} + +struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + if (ctx->qp_table[tind].refcnt) + return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; + else + return NULL; +} + +int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + int ret = 0; + + pthread_mutex_lock(&ctx->qp_table_mutex); + + if (!ctx->qp_table[tind].refcnt) { + ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1, + sizeof (struct mlx4_qp *)); + if (!ctx->qp_table[tind].table) { + ret = -1; + goto out; + } + } + + ++ctx->qp_table[tind].refcnt; + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; + +out: + pthread_mutex_unlock(&ctx->qp_table_mutex); + return ret; +} + +void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + pthread_mutex_lock(&ctx->qp_table_mutex); + + if (!--ctx->qp_table[tind].refcnt) + free(ctx->qp_table[tind].table); + else + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; + + pthread_mutex_unlock(&ctx->qp_table_mutex); +} diff --git a/src/srq.c b/src/srq.c new file mode 100644 index 0000000..14c64c8 --- /dev/null +++ b/src/srq.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +#include "mlx4.h" +#include "doorbell.h" +#include "wqe.h" + +static void *get_wqe(struct mlx4_srq *srq, int n) +{ + return srq->buf.buf + (n << srq->wqe_shift); +} + +void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind) +{ + struct mlx4_wqe_srq_next_seg *next; + + pthread_spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = htons(ind); + srq->tail = ind; + + pthread_spin_unlock(&srq->lock); +} + +int mlx4_post_srq_recv(struct ibv_srq *ibsrq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct mlx4_srq *srq = to_msrq(ibsrq); + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scat; + int err = 0; + int nreq; + int i; + + pthread_spin_lock(&srq->lock); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (wr->num_sge > srq->max_gs) { + err = -1; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = ntohs(next->next_wqe_index); + scat = (struct mlx4_wqe_data_seg *) (next + 1); + + for (i = 0; i < wr->num_sge; ++i) { + scat[i].byte_count = htonl(wr->sg_list[i].length); + scat[i].lkey = htonl(wr->sg_list[i].lkey); + scat[i].addr = htonl(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = htonl(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (nreq) { + srq->counter += nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + + *srq->db = htonl(srq->counter); + } + + pthread_spin_unlock(&srq->lock); + + return err; +} + +int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, + struct mlx4_srq *srq) +{ + struct mlx4_wqe_srq_next_seg *next; + int size; + int buf_size; + int i; + + srq->wrid = malloc(srq->max * sizeof (uint64_t)); + if (!srq->wrid) + return -1; + + size = sizeof (struct mlx4_wqe_srq_next_seg) + + srq->max_gs * sizeof (struct mlx4_wqe_data_seg); + + for (srq->wqe_shift = 6; 1 << srq->wqe_shift < size; ++srq->wqe_shift) + ; /* nothing */ + + buf_size = srq->max << srq->wqe_shift; + + if (mlx4_alloc_buf(&srq->buf, buf_size, + to_mdev(pd->context->device)->page_size)) { + free(srq->wrid); + return -1; + } + + memset(srq->buf.buf, 0, buf_size); + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. + */ + + for (i = 0; i < srq->max; ++i) { + next = get_wqe(srq, i); + + next->next_wqe_index = (i + 1) & (srq->max - 1); + } + + srq->head = 0; + srq->tail = srq->max - 1; + + return 0; +} diff --git a/src/verbs.c b/src/verbs.c new file mode 100644 index 0000000..0292945 --- /dev/null +++ b/src/verbs.c @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include + +#include "mlx4.h" +#include "mlx4-abi.h" +#include "wqe.h" + +int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr) +{ + struct ibv_query_device cmd; + uint64_t raw_fw_ver; + unsigned major, minor, sub_minor; + int ret; + + ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd); + if (ret) + return ret; + + major = (raw_fw_ver >> 32) & 0xffff; + minor = (raw_fw_ver >> 16) & 0xffff; + sub_minor = raw_fw_ver & 0xffff; + + snprintf(attr->fw_ver, sizeof attr->fw_ver, + "%d.%d.%03d", major, minor, sub_minor); + + return 0; +} + +int mlx4_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); +} + +struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context) +{ + struct ibv_alloc_pd cmd; + struct mlx4_alloc_pd_resp resp; + struct mlx4_pd *pd; + + pd = malloc(sizeof *pd); + if (!pd) + return NULL; + + if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp)) { + free(pd); + return NULL; + } + + pd->pdn = resp.pdn; + + return &pd->ibv_pd; +} + +int mlx4_free_pd(struct ibv_pd *pd) +{ + int ret; + + ret = ibv_cmd_dealloc_pd(pd); + if (ret) + return ret; + + free(to_mpd(pd)); + return 0; +} + +struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + enum ibv_access_flags access) +{ + struct ibv_mr *mr; + struct ibv_reg_mr cmd; + int ret; + + mr = malloc(sizeof *mr); + if (!mr) + return NULL; + +#ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS + { + struct ibv_reg_mr_resp resp; + + ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, + access, mr, &cmd, sizeof cmd, + &resp, sizeof resp); + } +#else + ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr, + &cmd, sizeof cmd); +#endif + if (ret) { + free(mr); + return NULL; + } + + return mr; +} + +int mlx4_dereg_mr(struct ibv_mr *mr) +{ + int ret; + + ret = ibv_cmd_dereg_mr(mr); + if (ret) + return ret; + + free(mr); + return 0; +} + +static int align_cq_size(int cqe) +{ + int nent; + + for (nent = 1; nent <= cqe; nent <<= 1) + ; /* nothing */ + + return nent; +} + +struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct mlx4_create_cq cmd; + struct mlx4_create_cq_resp resp; + struct mlx4_cq *cq; + int ret; + + /* Sanity check CQ size before proceeding */ + if (cqe > 131072) + return NULL; + + cq = malloc(sizeof *cq); + if (!cq) + return NULL; + + cq->cons_index = 0; + + if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE)) + goto err; + + cqe = align_cq_size(cqe); + + if (mlx4_alloc_buf(&cq->buf, cqe * MLX4_CQ_ENTRY_SIZE, + to_mdev(context->device)->page_size)) + goto err; + + memset(cq->buf.buf, 0, cqe * MLX4_CQ_ENTRY_SIZE); + + cq->set_ci_db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ); + if (!cq->set_ci_db) + goto err_buf; + + cq->arm_db = cq->set_ci_db + 1; + *cq->arm_db = 0; + cq->arm_sn = 1; + *cq->set_ci_db = 0; + + cmd.buf_addr = (uintptr_t) cq->buf.buf; + cmd.db_addr = (uintptr_t) cq->set_ci_db; + + ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector, + &cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp); + if (ret) + goto err_db; + + cq->cqn = resp.cqn; + + return &cq->ibv_cq; + +err_db: + mlx4_free_db(to_mctx(context), cq->set_ci_db); + +err_buf: + mlx4_free_buf(&cq->buf); + +err: + free(cq); + + return NULL; +} + +int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe) +{ + /* XXX resize CQ not implemented */ + return -ENOSYS; +} + +int mlx4_destroy_cq(struct ibv_cq *cq) +{ + int ret; + + ret = ibv_cmd_destroy_cq(cq); + if (ret) + return ret; + + mlx4_free_db(to_mctx(cq->context), to_mcq(cq)->set_ci_db); + mlx4_free_buf(&to_mcq(cq)->buf); + free(to_mcq(cq)); + + return 0; +} + +static int align_queue_size(struct ibv_context *context, int size, int spare) +{ + int ret; + + /* + * If someone asks for a 0-sized queue, presumably they're not + * going to use it. So don't mess with their size. + */ + if (!size) + return 0; + + for (ret = 1; ret < size + spare; ret <<= 1) + ; /* nothing */ + + return ret; +} + +struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *attr) +{ + struct mlx4_create_srq cmd; + struct mlx4_create_srq_resp resp; + struct mlx4_srq *srq; + int ret; + + /* Sanity check SRQ size before proceeding */ + if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64) + return NULL; + + srq = malloc(sizeof *srq); + if (!srq) + return NULL; + + if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) + goto err; + + srq->max = align_queue_size(pd->context, attr->attr.max_wr, 1); + srq->max_gs = attr->attr.max_sge; + srq->counter = 0; + + if (mlx4_alloc_srq_buf(pd, &attr->attr, srq)) + goto err; + + srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); + if (!srq->db) + goto err_free; + + *srq->db = 0; + + cmd.buf_addr = (uintptr_t) srq->buf.buf; + cmd.db_addr = (uintptr_t) srq->db; + + ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, + &cmd.ibv_cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp); + if (ret) + goto err_db; + + srq->srqn = resp.srqn; + + return &srq->ibv_srq; + +err_db: + mlx4_free_db(to_mctx(pd->context), srq->db); + +err_free: + free(srq->wrid); + mlx4_free_buf(&srq->buf); + +err: + free(srq); + + return NULL; +} + +int mlx4_modify_srq(struct ibv_srq *srq, + struct ibv_srq_attr *attr, + enum ibv_srq_attr_mask attr_mask) +{ + struct ibv_modify_srq cmd; + + return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); +} + +int mlx4_query_srq(struct ibv_srq *srq, + struct ibv_srq_attr *attr) +{ + struct ibv_query_srq cmd; + + return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); +} + +int mlx4_destroy_srq(struct ibv_srq *srq) +{ + int ret; + + ret = ibv_cmd_destroy_srq(srq); + if (ret) + return ret; + + mlx4_free_db(to_mctx(srq->context), to_msrq(srq)->db); + mlx4_free_buf(&to_msrq(srq)->buf); + free(to_msrq(srq)->wrid); + free(to_msrq(srq)); + + return 0; +} + +struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) +{ + struct mlx4_create_qp cmd; + struct ibv_create_qp_resp resp; + struct mlx4_qp *qp; + int ret; + + /* Sanity check QP size before proceeding */ + if (attr->cap.max_send_wr > 65536 || + attr->cap.max_recv_wr > 65536 || + attr->cap.max_send_sge > 64 || + attr->cap.max_recv_sge > 64 || + attr->cap.max_inline_data > 1024) + return NULL; + + qp = malloc(sizeof *qp); + if (!qp) + return NULL; + + qp->sq.max = align_queue_size(pd->context, attr->cap.max_send_wr, 0); + qp->rq.max = align_queue_size(pd->context, attr->cap.max_recv_wr, 0); + + if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp)) + goto err; + + mlx4_init_qp_indices(qp); + + if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) || + pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) + goto err_free; + + qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); + if (!qp->db) + goto err_free; + + *qp->db = 0; + + cmd.buf_addr = (uintptr_t) qp->buf.buf; + cmd.db_addr = (uintptr_t) qp->db; + + ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd, + &resp, sizeof resp); + if (ret) + goto err_rq_db; + + ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp); + if (ret) + goto err_destroy; + + qp->sq.max = attr->cap.max_send_wr; + qp->rq.max = attr->cap.max_recv_wr; + qp->sq.max_gs = attr->cap.max_send_sge; + qp->rq.max_gs = attr->cap.max_recv_sge; + qp->max_inline_data = attr->cap.max_inline_data; + + qp->doorbell_qpn = htonl(qp->ibv_qp.qp_num << 8); + if (attr->sq_sig_all) + qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE); + else + qp->sq_signal_bits = 0; + + return &qp->ibv_qp; + +err_destroy: + ibv_cmd_destroy_qp(&qp->ibv_qp); + +err_rq_db: + mlx4_free_db(to_mctx(pd->context), qp->db); + +err_free: + free(qp->sq.wrid); + free(qp->rq.wrid); + mlx4_free_buf(&qp->buf); + +err: + free(qp); + + return NULL; +} + +int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + + return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, sizeof cmd); +} + +int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) +{ + struct ibv_modify_qp cmd; + int ret; + + ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd); + + if (!ret && + (attr_mask & IBV_QP_STATE) && + attr->qp_state == IBV_QPS_RESET) { + mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, + qp->srq ? to_msrq(qp->srq) : NULL); + if (qp->send_cq != qp->recv_cq) + mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); + + mlx4_init_qp_indices(to_mqp(qp)); + } + + return ret; +} + +static void mlx4_lock_cqs(struct ibv_qp *qp) +{ + struct mlx4_cq *send_cq = to_mcq(qp->send_cq); + struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); + + if (send_cq == recv_cq) + pthread_spin_lock(&send_cq->lock); + else if (send_cq->cqn < recv_cq->cqn) { + pthread_spin_lock(&send_cq->lock); + pthread_spin_lock(&recv_cq->lock); + } else { + pthread_spin_lock(&recv_cq->lock); + pthread_spin_lock(&send_cq->lock); + } +} + +static void mlx4_unlock_cqs(struct ibv_qp *qp) +{ + struct mlx4_cq *send_cq = to_mcq(qp->send_cq); + struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); + + if (send_cq == recv_cq) + pthread_spin_unlock(&send_cq->lock); + else if (send_cq->cqn < recv_cq->cqn) { + pthread_spin_unlock(&recv_cq->lock); + pthread_spin_unlock(&send_cq->lock); + } else { + pthread_spin_unlock(&send_cq->lock); + pthread_spin_unlock(&recv_cq->lock); + } +} + +int mlx4_destroy_qp(struct ibv_qp *ibqp) +{ + struct mlx4_qp *qp = to_mqp(ibqp); + int ret; + + mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (ibqp->send_cq != ibqp->recv_cq) + mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL); + + mlx4_lock_cqs(ibqp); + mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num); + mlx4_unlock_cqs(ibqp); + + ret = ibv_cmd_destroy_qp(ibqp); + if (ret) { + mlx4_lock_cqs(ibqp); + mlx4_store_qp(to_mctx(ibqp->context), ibqp->qp_num, qp); + mlx4_unlock_cqs(ibqp); + + return ret; + } + + mlx4_free_db(to_mctx(ibqp->context), qp->db); + free(qp->sq.wrid); + free(qp->rq.wrid); + mlx4_free_buf(&qp->buf); + free(qp); + + return 0; +} + +struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +{ + struct mlx4_ah *ah; + + ah = malloc(sizeof *ah); + if (!ah) + return NULL; + + memset(&ah->av, 0, sizeof ah->av); + + ah->av.port_pd = htonl(to_mpd(pd)->pdn | (attr->port_num << 24)); + ah->av.g_slid = attr->src_path_bits; + ah->av.dlid = htons(attr->dlid); + if (attr->static_rate) { + ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET; + /* XXX check rate cap? */ + } + ah->av.sl_tclass_flowlabel = htonl(attr->sl << 28); + if (attr->is_global) { + ah->av.g_slid |= 0x80; + ah->av.gid_index = attr->grh.sgid_index; + ah->av.hop_limit = attr->grh.hop_limit; + ah->av.sl_tclass_flowlabel |= + htonl((attr->grh.traffic_class << 20) | + attr->grh.flow_label); + memcpy(ah->av.dgid, attr->grh.dgid.raw, 16); + } else { + /* XXX needed?? low byte of GID must be 2 */ + ah->av.dgid[3] = htonl(2); + } + + return &ah->ibv_ah; +} + +int mlx4_destroy_ah(struct ibv_ah *ah) +{ + free(to_mah(ah)); + + return 0; +} + +int mlx4_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +{ + return ibv_cmd_attach_mcast(qp, gid, lid); +} + +int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +{ + return ibv_cmd_detach_mcast(qp, gid, lid); +} diff --git a/src/wqe.h b/src/wqe.h new file mode 100644 index 0000000..b19a31a --- /dev/null +++ b/src/wqe.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef WQE_H +#define WQE_H + +enum { + MLX4_SEND_DOORBELL = 0x14, +}; + +enum { + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICIT = 1 << 1, +}; + +enum { + MLX4_INLINE_SEG = 1 << 31 +}; + +enum { + MLX4_INVALID_LKEY = 0x100, +}; + +struct mlx4_wqe_ctrl_seg { + uint32_t owner_opcode; + uint8_t reserved2[3]; + uint8_t fence_size; + /* + * High 24 bits are SRC remote buffer; low 8 bits are flags: + * [7] SO (strong ordering) + * [5] TCP/UDP checksum + * [4] IP checksum + * [3:2] C (generate completion queue entry) + * [1] SE (solicited event) + * [0] FL (force loopback) + */ + uint32_t srcrb_flags; + /* + * imm is immediate data for send/RDMA write w/ immediate; + * also invalidation key for send with invalidate; input + * modifier for WQEs on CCQs. + */ + uint32_t imm; +}; + +struct mlx4_wqe_datagram_seg { + uint32_t av[8]; + uint32_t dqpn; + uint32_t qkey; + uint32_t reserved[2]; +}; + +struct mlx4_wqe_data_seg { + uint32_t byte_count; + uint32_t lkey; + uint64_t addr; +}; + +struct mlx4_wqe_inline_seg { + uint32_t byte_count; +}; + +struct mlx4_wqe_srq_next_seg { + uint16_t reserved1; + uint16_t next_wqe_index; + uint32_t reserved2[3]; +}; + +/* XXX the rest of these are still old WQE formats... */ +struct mlx4_bind_seg { + uint32_t flags; /* [31] Atomic [30] rem write [29] rem read */ + uint32_t reserved; + uint32_t new_rkey; + uint32_t lkey; + uint64_t addr; + uint64_t length; +}; + +struct mlx4_raddr_seg { + uint64_t raddr; + uint32_t rkey; + uint32_t reserved; +}; + +struct mlx4_atomic_seg { + uint64_t swap_add; + uint64_t compare; +}; + +#endif /* WQE_H */ -- 2.46.0