From ede311583f519c791f3854301675b2beb1406971 Mon Sep 17 00:00:00 2001 From: Mahesh Vardhamanaiah Date: Tue, 30 Jul 2013 11:09:16 -0700 Subject: [PATCH] Added libocrdma files --- AUTHORS | 1 + COPYING | 282 ++++++ Changelog | 0 Makefile.am | 23 + README | 19 + autogen.sh | 8 + config/.gitignore | 8 + configure.in | 68 ++ libocrdma.spec.in | 54 ++ ocrdma.driver | 1 + src/ocrdma.map | 5 + src/ocrdma_abi.h | 354 +++++++ src/ocrdma_list.h | 104 +++ src/ocrdma_main.c | 259 ++++++ src/ocrdma_main.h | 301 ++++++ src/ocrdma_verbs.c | 2198 ++++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 3685 insertions(+) create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 Changelog create mode 100644 Makefile.am create mode 100644 README create mode 100644 autogen.sh create mode 100644 config/.gitignore create mode 100644 configure.in create mode 100644 libocrdma.spec.in create mode 100644 ocrdma.driver create mode 100644 src/ocrdma.map create mode 100644 src/ocrdma_abi.h create mode 100644 src/ocrdma_list.h create mode 100644 src/ocrdma_main.c create mode 100644 src/ocrdma_main.h create mode 100644 src/ocrdma_verbs.c diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..0acfd9d --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +Emulex Corporation. diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..f46bbb1 --- /dev/null +++ b/COPYING @@ -0,0 +1,282 @@ + + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS diff --git a/Changelog b/Changelog new file mode 100644 index 0000000..e69de29 diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..c07f38c --- /dev/null +++ b/Makefile.am @@ -0,0 +1,23 @@ + +lib_LTLIBRARIES = src/libocrdma.la + +AM_CFLAGS = -Wall -D_GNU_SOURCE + +if HAVE_LD_VERSION_SCRIPT + ocrdma_version_script = -Wl,--version-script=$(srcdir)/src/ocrdma.map +else + ocrdma_version_script = +endif + +src_libocrdma_la_SOURCES = src/ocrdma_main.c src/ocrdma_verbs.c +src_libocrdma_la_LDFLAGS = -avoid-version -module -release @IBV_DEVICE_LIBRARY_EXTENSION@\ + $(ocrdma_version_script) + +ocrdmaconfdir = $(sysconfdir)/libibverbs.d +ocrdmaconf_DATA = ocrdma.driver + +EXTRA_DIST = src/ocrdma.map src/ocrdma_main.h src/ocrdma_abi.h \ + src/ocrdma_list.h libocrdma.spec.in ocrdma.driver + +dist-hook: libocrdma.spec + cp libocrdma.spec $(distdir) diff --git a/README b/README new file mode 100644 index 0000000..6ae30fe --- /dev/null +++ b/README @@ -0,0 +1,19 @@ +Introduction +============ + +libocrdma is a userspace driver for Emulex OneConnect RDMA Adapters. +It is a plug-in module for libibverbs that allows programs to use +Emulex RDMA hardware directly from userspace. See the libibverbs +package for more information. + +Using libocrdma +=============== + +libocrdma will be loaded and used automatically by programs linked +with libibverbs. The ocrdma kernel modules must be loaded for RDMA +devices to be detected and used. + +Supported Hardware +================== + +libocrdma supports all RDMA capable Emulex adapters. diff --git a/autogen.sh b/autogen.sh new file mode 100644 index 0000000..fd47839 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,8 @@ +#! /bin/sh + +set -x +aclocal -I config +libtoolize --force --copy +autoheader +automake --foreign --add-missing --copy +autoconf diff --git a/config/.gitignore b/config/.gitignore new file mode 100644 index 0000000..4d4c7b1 --- /dev/null +++ b/config/.gitignore @@ -0,0 +1,8 @@ +mkinstalldirs +depcomp +compile +missing +config.guess +config.sub +ltmain.sh +install-sh diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..24af2a4 --- /dev/null +++ b/configure.in @@ -0,0 +1,68 @@ +dnl Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.57) +AC_INIT(libocrdma, 1.3.0, linux-rdma@vger.kernel.org) +AC_CONFIG_SRCDIR([src/ocrdma_main.h]) +AC_CONFIG_AUX_DIR(config) +AM_CONFIG_HEADER(config.h) +AM_INIT_AUTOMAKE(libocrdma, 1.0.0) +AM_PROG_LIBTOOL + +AC_ARG_ENABLE(libcheck, [ --disable-libcheck do not test for the presence of ib libraries], +[ if test x$enableval = xno ; then + disable_libcheck=yes + fi +]) + +dnl Checks for programs +AC_PROG_CC +AC_CHECK_SIZEOF(long) + +dnl Checks for libraries +if test "$disable_libcheck" != "yes" +then +AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], + AC_MSG_ERROR([ibv_get_device_list() not found. libocrdma requires libibverbs.])) +fi + +dnl Checks for header files. +AC_CHECK_HEADERS(sysfs/libsysfs.h) + +if test "$disable_libcheck" != "yes" +then +AC_CHECK_HEADER(infiniband/driver.h, [], + AC_MSG_ERROR([ not found. Is libibverbs installed?])) +AC_HEADER_STDC +fi + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST + +dnl Checks for library functions +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_register_driver) + +dnl Check for libibverbs device library extension +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then + AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) +fi +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) + +AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, + if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then + ac_cv_version_script=yes + else + ac_cv_version_script=no + fi) + +AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") +AM_CONDITIONAL(FMODEL, test "$FMODEL" = "1") + +AC_CONFIG_FILES([Makefile libocrdma.spec]) +AC_OUTPUT diff --git a/libocrdma.spec.in b/libocrdma.spec.in new file mode 100644 index 0000000..2e74263 --- /dev/null +++ b/libocrdma.spec.in @@ -0,0 +1,54 @@ +%define ver @VERSION@ + +Name: libocrdma +Version: 0.0.1 +Release: 1%{?dist} +Summary: Emulex OneConnect SLI4 Compliant RDMA Open Fabrics Userspace Library + +Group: System Environment/Libraries +License: GPL/BSD +Url: http://www.openfabrics.org/ +Source: http://www.openfabrics.org/downloads/ocrdma/%{name}-%{ver}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +BuildRequires: libibverbs-devel + +%description +libocrdma provides a device-specific userspace driver for Emulex OneConnect RDMAAdapters for use with the libibverbs library. + +%package devel +Summary: Development files for the libocrdma driver +Group: System Environment/Libraries +Requires: %{name} = %{version}-%{release} + +%description devel +Static version of libocrdma that may be linked directly to an +application, which may be useful for debugging. + +%prep +%setup -q -n %{name}-%{ver} + +%build +%configure +make %{?_smp_mflags} + +%install +rm -rf $RPM_BUILD_ROOT +%makeinstall +# remove unpackaged files from the buildroot +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%{_libdir}/libocrdma*.so +%doc AUTHORS COPYING README +%config %{_sysconfdir}/libibverbs.d/ocrdma.driver + +%files devel +%defattr(-,root,root,-) +%{_libdir}/libocrdma*.a + +%changelog diff --git a/ocrdma.driver b/ocrdma.driver new file mode 100644 index 0000000..dead9f8 --- /dev/null +++ b/ocrdma.driver @@ -0,0 +1 @@ +driver ocrdma diff --git a/src/ocrdma.map b/src/ocrdma.map new file mode 100644 index 0000000..ae8ed86 --- /dev/null +++ b/src/ocrdma.map @@ -0,0 +1,5 @@ +{ + global: + openib_driver_init; + local: *; +}; diff --git a/src/ocrdma_abi.h b/src/ocrdma_abi.h new file mode 100644 index 0000000..12ce74b --- /dev/null +++ b/src/ocrdma_abi.h @@ -0,0 +1,354 @@ +/* + * Copyright (C) 2008-2013 Emulex. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __OCRDMA_ABI_H__ +#define __OCRDMA_ABI_H__ + +#include + +#define Bit(_b) (1 << (_b)) + +#define OCRDMA_MAX_QP 2048 + +enum { + OCRDMA_DB_RQ_OFFSET = 0xE0, + OCRDMA_DB_SQ_OFFSET = 0x60, + OCRDMA_DB_SRQ_OFFSET = OCRDMA_DB_RQ_OFFSET, + OCRDMA_DB_CQ_OFFSET = 0x120 +}; + +#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ +#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 */ +#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1 /* bits 12-11 */ +#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ +/* Rearm bit */ +#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */ + +/* solicited bit */ +#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */ + +struct ocrdma_get_context { + struct ibv_get_context cmd; +}; + +struct ocrdma_alloc_ucontext_resp { + struct ibv_get_context_resp ibv_resp; + uint32_t dev_id; + uint32_t wqe_size; + uint32_t max_inline_data; + uint32_t dpp_wqe_size; + uint64_t ah_tbl_page; + uint32_t ah_tbl_len; + uint32_t rqe_size; + uint8_t fw_ver[32]; + uint32_t rsvd1; + uint64_t rsvd2; +}; + +struct ocrdma_alloc_pd_req { + struct ibv_alloc_pd cmd; + uint64_t rsvd; +}; + +struct ocrdma_alloc_pd_resp { + struct ibv_alloc_pd_resp ibv_resp; + uint32_t id; + uint32_t dpp_enabled; + uint32_t dpp_page_addr_hi; + uint32_t dpp_page_addr_lo; + uint64_t rsvd; +}; + +struct ocrdma_create_cq_req { + struct ibv_create_cq ibv_cmd; + uint32_t dpp_cq; + uint32_t rsvd; +}; + +#define MAX_CQ_PAGES 8 +struct ocrdma_create_cq_resp { + struct ibv_create_cq_resp ibv_resp; + uint32_t cq_id; + uint32_t size; + uint32_t num_pages; + uint32_t max_hw_cqe; + uint64_t page_addr[MAX_CQ_PAGES]; + uint64_t db_page_addr; + uint32_t db_page_size; + uint32_t phase_change; + uint64_t rsvd1; + uint64_t rsvd2; +}; + +struct ocrdma_reg_mr { + struct ibv_reg_mr ibv_cmd; +}; + +struct ocrdma_reg_mr_resp { + struct ibv_reg_mr_resp ibv_resp; +}; + +struct ocrdma_create_qp_cmd { + struct ibv_create_qp ibv_cmd; + uint8_t enable_dpp_cq; + uint8_t rsvd; + uint16_t dpp_cq_id; + uint32_t rsvd1; /* pad */ +}; + +#define MAX_QP_PAGES 8 +#define MAX_UD_HDR_PAGES 8 + +struct ocrdma_create_qp_uresp { + struct ibv_create_qp_resp ibv_resp; + uint16_t qp_id; + uint16_t sq_dbid; + uint16_t rq_dbid; + uint16_t resv0; /* pad */ + uint32_t sq_page_size; + uint32_t rq_page_size; + uint32_t num_sq_pages; + uint32_t num_rq_pages; + uint64_t sq_page_addr[MAX_QP_PAGES]; + uint64_t rq_page_addr[MAX_QP_PAGES]; + uint64_t db_page_addr; + uint32_t db_page_size; + uint32_t dpp_credit; + uint32_t dpp_offset; + uint32_t num_wqe_allocated; + uint32_t num_rqe_allocated; + uint32_t db_sq_offset; + uint32_t db_rq_offset; + uint32_t db_shift; + uint64_t rsvd2; + uint64_t rsvd3; +}; + +struct ocrdma_create_srq_cmd { + struct ibv_create_srq ibv_cmd; +}; + +struct ocrdma_create_srq_resp { + struct ibv_create_srq_resp ibv_resp; + uint16_t rq_dbid; + uint16_t resv0; + uint32_t resv1; + + uint32_t rq_page_size; + uint32_t num_rq_pages; + + uint64_t rq_page_addr[MAX_QP_PAGES]; + uint64_t db_page_addr; + + uint32_t db_page_size; + uint32_t num_rqe_allocated; + uint32_t db_rq_offset; + uint32_t db_shift; + uint64_t rsvd2; + uint64_t rsvd3; +}; + +enum OCRDMA_CQE_STATUS { + OCRDMA_CQE_SUCCESS = 0, + OCRDMA_CQE_LOC_LEN_ERR = 1, + OCRDMA_CQE_LOC_QP_OP_ERR = 2, + OCRDMA_CQE_LOC_EEC_OP_ERR = 3, + OCRDMA_CQE_LOC_PROT_ERR = 4, + OCRDMA_CQE_WR_FLUSH_ERR = 5, + OCRDMA_CQE_MW_BIND_ERR = 6, + OCRDMA_CQE_BAD_RESP_ERR = 7, + OCRDMA_CQE_LOC_ACCESS_ERR = 8, + OCRDMA_CQE_REM_INV_REQ_ERR = 9, + OCRDMA_CQE_REM_ACCESS_ERR = 0xa, + OCRDMA_CQE_REM_OP_ERR = 0xb, + OCRDMA_CQE_RETRY_EXC_ERR = 0xc, + OCRDMA_CQE_RNR_RETRY_EXC_ERR = 0xd, + OCRDMA_CQE_LOC_RDD_VIOL_ERR = 0xe, + OCRDMA_CQE_REM_INV_RD_REQ_ERR = 0xf, + OCRDMA_CQE_REM_ABORT_ERR = 0x10, + OCRDMA_CQE_INV_EECN_ERR = 0x11, + OCRDMA_CQE_INV_EEC_STATE_ERR = 0x12, + OCRDMA_CQE_FATAL_ERR = 0x13, + OCRDMA_CQE_RESP_TIMEOUT_ERR = 0x14, + OCRDMA_CQE_GENERAL_ERR +}; + +enum { + /* w0 */ + OCRDMA_CQE_WQEIDX_SHIFT = 0, + OCRDMA_CQE_WQEIDX_MASK = 0xFFFF, + + /* w1 */ + OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16, + OCRDMA_CQE_PKEY_SHIFT = 0, + OCRDMA_CQE_PKEY_MASK = 0xFFFF, + + /* w2 */ + OCRDMA_CQE_QPN_SHIFT = 0, + OCRDMA_CQE_QPN_MASK = 0x0000FFFF, + + OCRDMA_CQE_BUFTAG_SHIFT = 16, + OCRDMA_CQE_BUFTAG_MASK = 0xFFFF << OCRDMA_CQE_BUFTAG_SHIFT, + + /* w3 */ + OCRDMA_CQE_UD_STATUS_SHIFT = 24, + OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT, + OCRDMA_CQE_STATUS_SHIFT = 16, + OCRDMA_CQE_STATUS_MASK = (0xFF << OCRDMA_CQE_STATUS_SHIFT), + OCRDMA_CQE_VALID = Bit(31), + OCRDMA_CQE_INVALIDATE = Bit(30), + OCRDMA_CQE_QTYPE = Bit(29), + OCRDMA_CQE_IMM = Bit(28), + OCRDMA_CQE_WRITE_IMM = Bit(27), + OCRDMA_CQE_QTYPE_SQ = 0, + OCRDMA_CQE_QTYPE_RQ = 1, + OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF +}; + +struct ocrdma_cqe { + union { + /* w0 to w2 */ + struct { + uint32_t wqeidx; + uint32_t bytes_xfered; + uint32_t qpn; + } wq; + struct { + uint32_t lkey_immdt; + uint32_t rxlen; + uint32_t buftag_qpn; + } rq; + struct { + uint32_t lkey_immdt; + uint32_t rxlen_pkey; + uint32_t buftag_qpn; + } ud; + struct { + uint32_t word_0; + uint32_t word_1; + uint32_t qpn; + } cmn; + }; + uint32_t flags_status_srcqpn; /* w3 */ +} __attribute__ ((packed)); + +struct ocrdma_sge { + uint32_t addr_hi; + uint32_t addr_lo; + uint32_t lrkey; + uint32_t len; +} __attribute__ ((packed)); + +enum { + OCRDMA_WQE_OPCODE_SHIFT = 0, + OCRDMA_WQE_OPCODE_MASK = 0x0000001F, + OCRDMA_WQE_FLAGS_SHIFT = 5, + OCRDMA_WQE_TYPE_SHIFT = 16, + OCRDMA_WQE_TYPE_MASK = 0x00030000, + OCRDMA_WQE_SIZE_SHIFT = 18, + OCRDMA_WQE_SIZE_MASK = 0xFF, + OCRDMA_WQE_NXT_WQE_SIZE_SHIFT = 25, + OCRDMA_WQE_LKEY_FLAGS_SHIFT = 0, + OCRDMA_WQE_LKEY_FLAGS_MASK = 0xF +}; + +enum { + OCRDMA_FLAG_SIG = 0x1, + OCRDMA_FLAG_INV = 0x2, + OCRDMA_FLAG_FENCE_L = 0x4, + OCRDMA_FLAG_FENCE_R = 0x8, + OCRDMA_FLAG_SOLICIT = 0x10, + OCRDMA_FLAG_IMM = 0x20, + + /* Stag flags */ + OCRDMA_LKEY_FLAG_LOCAL_WR = 0x1, + OCRDMA_LKEY_FLAG_REMOTE_RD = 0x2, + OCRDMA_LKEY_FLAG_REMOTE_WR = 0x4, + OCRDMA_LKEY_FLAG_VATO = 0x8 +}; + +enum { + OCRDMA_TYPE_INLINE = 0x0, + OCRDMA_TYPE_LKEY = 0x1 +}; + +#define OCRDMA_CQE_QTYPE_RQ 1 +#define OCRDMA_CQE_QTYPE_SQ 0 + +enum OCRDMA_WQE_OPCODE { + OCRDMA_WRITE = 0x06, + OCRDMA_READ = 0x0C, + OCRDMA_RESV0 = 0x02, + OCRDMA_SEND = 0x00, + OCRDMA_BIND_MW = 0x08, + OCRDMA_RESV1 = 0x0A, + OCRDMA_LKEY_INV = 0x15, +}; + +#define OCRDMA_WQE_STRIDE 8 +#define OCRDMA_WQE_ALIGN_BYTES 16 +/* header WQE for all the SQ and RQ operations */ +struct ocrdma_hdr_wqe { + uint32_t cw; + union { + uint32_t rsvd_tag; + uint32_t rsvd_stag_flags; + }; + union { + uint32_t immdt; + uint32_t lkey; + }; + uint32_t total_len; +} __attribute__ ((packed)); + +struct ocrdma_ewqe_atomic { + uint32_t ra_hi; + uint32_t ra_lo; + uint32_t rkey; + uint32_t rlen; + uint32_t swap_add_hi; + uint32_t swap_add_lo; + uint32_t compare_hi; + uint32_t compare_lo; + struct ocrdma_sge sge; +} __attribute__ ((packed)); + +struct ocrdma_ewqe_ud_hdr { + uint32_t rsvd_dest_qpn; + uint32_t qkey; + uint32_t rsvd_ahid; + uint32_t rsvd; +} __attribute__ ((packed)); + +#endif /* __OCRDMA_ABI_H__ */ diff --git a/src/ocrdma_list.h b/src/ocrdma_list.h new file mode 100644 index 0000000..11d56ce --- /dev/null +++ b/src/ocrdma_list.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2008-2013 Emulex. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __OCRDMA_LIST_H__ +#define __OCRDMA_LIST_H__ + +struct ocrdma_list_node { + struct ocrdma_list_node *next, *prev; +}; + +struct ocrdma_list_head { + struct ocrdma_list_node node; + pthread_mutex_t lock; +}; + +#define DBLY_LIST_HEAD_INIT(name) { { &(name.node), &(name.node) } , \ + PTHREAD_MUTEX_INITIALIZER } + +#define DBLY_LIST_HEAD(name) \ + struct ocrdma_list_head name = DBLY_LIST_HEAD_INIT(name); \ + +#define INIT_DBLY_LIST_NODE(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +#define INIT_DBLY_LIST_HEAD(ptr) INIT_DBLY_LIST_NODE(ptr.node) + +static inline void __list_add_node(struct ocrdma_list_node *new, + struct ocrdma_list_node *prev, + struct ocrdma_list_node *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add_node_tail(struct ocrdma_list_node *new, + struct ocrdma_list_head *head) +{ + __list_add_node(new, head->node.prev, &head->node); +} + +static inline void __list_del_node(struct ocrdma_list_node *prev, + struct ocrdma_list_node *next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void list_del_node(struct ocrdma_list_node *entry) +{ + __list_del_node(entry->prev, entry->next); + entry->next = entry->prev = 0; +} + +#define list_lock(head) pthread_mutex_lock(&((head)->lock)) +#define list_unlock(head) pthread_mutex_unlock(&((head)->lock)) + +#define list_node(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each_node_safe - iterate over a list safe against removal of list entry + * @pos: the &struct ocrdma_list_head to use as a loop counter. + * @n: another &struct ocrdma_list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_node_safe(pos, n, head) \ + for (pos = (head)->node.next, n = pos->next; pos != &((head)->node); \ + pos = n, n = pos->next) + +#endif /* __OCRDMA_LIST_H__ */ diff --git a/src/ocrdma_main.c b/src/ocrdma_main.c new file mode 100644 index 0000000..f1c01f0 --- /dev/null +++ b/src/ocrdma_main.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2008-2013 Emulex. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ocrdma_main.h" +#include "ocrdma_abi.h" +#include "ocrdma_list.h" + +#include +#include +#include + +#define PCI_VENDOR_ID_EMULEX 0x10DF +#define PCI_DEVICE_ID_EMULEX_GEN1 0xe220 +#define PCI_DEVICE_ID_EMULEX_GEN2 0x720 +#define PCI_DEVICE_ID_EMULEX_GEN2_VF 0x728 + +#define UCNA(v, d) \ + { .vendor = PCI_VENDOR_ID_##v, \ + .device = PCI_DEVICE_ID_EMULEX_##d } + +struct { + unsigned vendor; + unsigned device; +} ucna_table[] = { + UCNA(EMULEX, GEN1), UCNA(EMULEX, GEN2), UCNA(EMULEX, GEN2_VF) +}; + +static DBLY_LIST_HEAD(ocrdma_dev_list); + +static struct ibv_context *ocrdma_alloc_context(struct ibv_device *, int); +static void ocrdma_free_context(struct ibv_context *); + +static struct ibv_context_ops ocrdma_ctx_ops = { + .query_device = ocrdma_query_device, + .query_port = ocrdma_query_port, + .alloc_pd = ocrdma_alloc_pd, + .dealloc_pd = ocrdma_free_pd, + .reg_mr = ocrdma_reg_mr, + .dereg_mr = ocrdma_dereg_mr, + .create_cq = ocrdma_create_cq, + .poll_cq = ocrdma_poll_cq, + .req_notify_cq = ocrdma_arm_cq, + .cq_event = ocrdma_cq_handler, + .resize_cq = ocrdma_resize_cq, + .destroy_cq = ocrdma_destroy_cq, + + .create_qp = ocrdma_create_qp, + .query_qp = ocrdma_query_qp, + .modify_qp = ocrdma_modify_qp, + .destroy_qp = ocrdma_destroy_qp, + .post_send = ocrdma_post_send, + .post_recv = ocrdma_post_recv, + .create_ah = ocrdma_create_ah, + .destroy_ah = ocrdma_destroy_ah, + .async_event = ocrdma_async_event, + + .create_srq = ocrdma_create_srq, + .modify_srq = ocrdma_modify_srq, + .query_srq = ocrdma_query_srq, + .destroy_srq = ocrdma_destroy_srq, + .post_srq_recv = ocrdma_post_srq_recv, + .attach_mcast = ocrdma_attach_mcast, + .detach_mcast = ocrdma_detach_mcast +}; + +static struct ibv_device_ops ocrdma_dev_ops = { + .alloc_context = ocrdma_alloc_context, + .free_context = ocrdma_free_context +}; + +/* + * ocrdma_alloc_context + */ +static struct ibv_context *ocrdma_alloc_context(struct ibv_device *ibdev, + int cmd_fd) +{ + struct ocrdma_devctx *ctx; + struct ocrdma_get_context cmd; + struct ocrdma_alloc_ucontext_resp resp; + + ctx = calloc(1, sizeof(struct ocrdma_devctx)); + if (!ctx) + return NULL; + memset(&resp, 0, sizeof(resp)); + + ctx->ibv_ctx.cmd_fd = cmd_fd; + + if (ibv_cmd_get_context(&ctx->ibv_ctx, + (struct ibv_get_context *)&cmd, sizeof cmd, + &resp.ibv_resp, sizeof(resp))) + goto cmd_err; + + ctx->ibv_ctx.device = ibdev; + ctx->ibv_ctx.ops = ocrdma_ctx_ops; + get_ocrdma_dev(ibdev)->id = resp.dev_id; + get_ocrdma_dev(ibdev)->max_inline_data = resp.max_inline_data; + get_ocrdma_dev(ibdev)->wqe_size = resp.wqe_size; + get_ocrdma_dev(ibdev)->rqe_size = resp.rqe_size; + memcpy(get_ocrdma_dev(ibdev)->fw_ver, resp.fw_ver, sizeof(resp.fw_ver)); + get_ocrdma_dev(ibdev)->dpp_wqe_size = resp.dpp_wqe_size; + + ctx->ah_tbl = + mmap(NULL, resp.ah_tbl_len, PROT_READ | PROT_WRITE, MAP_SHARED, + cmd_fd, resp.ah_tbl_page); + + if (ctx->ah_tbl == MAP_FAILED) + goto cmd_err; + ctx->ah_tbl_len = resp.ah_tbl_len; + ocrdma_init_ahid_tbl(ctx); + + return &ctx->ibv_ctx; + +cmd_err: + ocrdma_err("%s: Failed to allocate context for device.\n", __func__); + free(ctx); + return NULL; +} + +/* + * ocrdma_free_context + */ +static void ocrdma_free_context(struct ibv_context *ibctx) +{ + struct ocrdma_devctx *ctx = get_ocrdma_ctx(ibctx); + + if (ctx->ah_tbl) + munmap((void *)ctx->ah_tbl, ctx->ah_tbl_len); + + free(ctx); +} + +/** + * ocrdma_driver_init + */ +struct ibv_device *ocrdma_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + + char value[16]; + struct ocrdma_device *dev; + unsigned vendor, device; + int i; + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + value, sizeof(value)) < 0) { + return NULL; + } + sscanf(value, "%i", &vendor); + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", + value, sizeof(value)) < 0) { + return NULL; + } + sscanf(value, "%i", &device); + + for (i = 0; i < sizeof ucna_table / sizeof ucna_table[0]; ++i) { + if (vendor == ucna_table[i].vendor && + device == ucna_table[i].device) + goto found; + } + return NULL; +found: + dev = malloc(sizeof *dev); + if (!dev) { + ocrdma_err("%s() Fatal: fail allocate device for libocrdma\n", + __func__); + return NULL; + } + bzero(dev, sizeof *dev); + dev->qp_tbl = malloc(OCRDMA_MAX_QP * sizeof(struct ocrdma_qp *)); + if (!dev->qp_tbl) + goto qp_err; + bzero(dev->qp_tbl, OCRDMA_MAX_QP * sizeof(struct ocrdma_qp *)); + pthread_mutex_init(&dev->dev_lock, NULL); + pthread_spin_init(&dev->flush_q_lock, PTHREAD_PROCESS_PRIVATE); + dev->ibv_dev.ops = ocrdma_dev_ops; + INIT_DBLY_LIST_NODE(&dev->entry); + list_lock(&ocrdma_dev_list); + list_add_node_tail(&dev->entry, &ocrdma_dev_list); + list_unlock(&ocrdma_dev_list); + return &dev->ibv_dev; +qp_err: + free(dev); + return NULL; +} + +/* + * ocrdma_register_driver + */ +static __attribute__ ((constructor)) +void ocrdma_register_driver(void) +{ + ibv_register_driver("ocrdma", ocrdma_driver_init); +} + +static __attribute__ ((destructor)) +void ocrdma_unregister_driver(void) +{ + struct ocrdma_list_node *cur, *tmp; + struct ocrdma_device *dev; + list_lock(&ocrdma_dev_list); + list_for_each_node_safe(cur, tmp, &ocrdma_dev_list) { + dev = list_node(cur, struct ocrdma_device, entry); + pthread_mutex_destroy(&dev->dev_lock); + pthread_spin_destroy(&dev->flush_q_lock); + list_del_node(&dev->entry); + /* + * Avoid freeing the dev here since MPI get SIGSEGV + * in few error cases because of reference to ib_dev + * after free. + * TODO Bug 135437 fix it properly to avoid mem leak + */ + /* free(dev); */ + } + list_unlock(&ocrdma_dev_list); +} diff --git a/src/ocrdma_main.h b/src/ocrdma_main.h new file mode 100644 index 0000000..392c77a --- /dev/null +++ b/src/ocrdma_main.h @@ -0,0 +1,301 @@ +/* + * Copyright (C) 2008-2013 Emulex. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __OCRDMA_MAIN_H__ +#define __OCRDMA_MAIN_H__ + +#include +#include +#include + +#include +#include + +#include "ocrdma_list.h" + +#define ocrdma_err(format, arg...) printf(format, ##arg) + +#define OCRDMA_DPP_PAGE_SIZE (4096) + +#define ROUND_UP_X(_val, _x) \ + (((unsigned long)(_val) + ((_x)-1)) & (long)~((_x)-1)) + +struct ocrdma_qp; + +struct ocrdma_device { + struct ibv_device ibv_dev; + struct ocrdma_qp **qp_tbl; + pthread_mutex_t dev_lock; + pthread_spinlock_t flush_q_lock; + struct ocrdma_list_node entry; + int id; + int gen; + uint32_t wqe_size; + uint32_t rqe_size; + uint32_t dpp_wqe_size; + uint32_t max_inline_data; + uint8_t fw_ver[32]; +}; + +struct ocrdma_devctx { + struct ibv_context ibv_ctx; + uint32_t *ah_tbl; + uint32_t ah_tbl_len; + pthread_mutex_t tbl_lock; +}; + +struct ocrdma_pd { + struct ibv_pd ibv_pd; + struct ocrdma_device *dev; + struct ocrdma_devctx *uctx; + void *dpp_va; +}; + +struct ocrdma_mr { + struct ibv_mr ibv_mr; +}; + +struct ocrdma_cq { + struct ibv_cq ibv_cq; + struct ocrdma_device *dev; + uint16_t cq_id; + uint16_t cq_dbid; + uint16_t getp; + pthread_spinlock_t cq_lock; + uint32_t max_hw_cqe; + uint32_t cq_mem_size; + struct ocrdma_cqe *va; + void *db_va; + + uint32_t db_size; + + uint32_t phase; + int phase_change; + + int armed; + int solicited; + int arm_needed; + struct ocrdma_list_head sq_head; + struct ocrdma_list_head rq_head; +}; + +enum { + OCRDMA_DPP_WQE_INDEX_MASK = 0xFFFF, + OCRDMA_DPP_CQE_VALID_BIT_SHIFT = 31, + OCRDMA_DPP_CQE_VALID_BIT_MASK = 1 << 31 +}; + +struct ocrdma_dpp_cqe { + uint32_t wqe_idx_valid; +}; + +enum { + OCRDMA_PD_MAX_DPP_ENABLED_QP = 16 +}; + +struct ocrdma_qp_hwq_info { + uint8_t *va; /* virtual address */ + uint32_t max_sges; + uint32_t free_cnt; + + uint32_t head, tail; + uint32_t entry_size; + uint32_t max_cnt; + uint32_t max_wqe_idx; + uint32_t len; + uint16_t dbid; /* qid, where to ring the doorbell. */ +}; + +struct ocrdma_srq { + struct ibv_srq ibv_srq; + struct ocrdma_device *dev; + void *db_va; + uint32_t db_size; + pthread_spinlock_t q_lock; + + struct ocrdma_qp_hwq_info rq; + uint32_t max_rq_sges; + uint32_t id; + uint64_t *rqe_wr_id_tbl; + uint32_t *idx_bit_fields; + uint32_t bit_fields_len; + uint32_t db_shift; +}; + +enum { + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT = 1 +}; + +enum ocrdma_qp_state { + OCRDMA_QPS_RST = 0, + OCRDMA_QPS_INIT = 1, + OCRDMA_QPS_RTR = 2, + OCRDMA_QPS_RTS = 3, + OCRDMA_QPS_SQE = 4, + OCRDMA_QPS_SQ_DRAINING = 5, + OCRDMA_QPS_ERR = 6, + OCRDMA_QPS_SQD = 7 +}; + +struct ocrdma_qp { + struct ibv_qp ibv_qp; + struct ocrdma_device *dev; + pthread_spinlock_t q_lock; + + struct ocrdma_qp_hwq_info sq; + struct ocrdma_cq *sq_cq; + struct { + uint64_t wrid; + uint16_t dpp_wqe_idx; + uint16_t dpp_wqe; + uint8_t signaled; + uint8_t rsvd[3]; + } *wqe_wr_id_tbl; + struct ocrdma_qp_hwq_info dpp_q; + int dpp_enabled; + + struct ocrdma_qp_hwq_info rq; + struct ocrdma_cq *rq_cq; + uint64_t *rqe_wr_id_tbl; + void *db_va; + void *db_sq_va; + void *db_rq_va; + uint32_t max_inline_data; + + struct ocrdma_srq *srq; + struct ocrdma_cq *dpp_cq; + + uint32_t db_size; + uint32_t max_ord; + uint32_t max_ird; + uint32_t dpp_prev_indx; + + enum ibv_qp_type qp_type; + enum ocrdma_qp_state state; + struct ocrdma_list_node sq_entry; + struct ocrdma_list_node rq_entry; + uint16_t id; + uint16_t rsvd; + uint32_t db_shift; + int signaled; /* signaled QP */ +}; + +struct ocrdma_ah { + struct ibv_ah ibv_ah; + struct ocrdma_pd *pd; + uint16_t id; +}; + +#define get_ocrdma_xxx(xxx, type) \ + ((struct ocrdma_##type *) \ + ((void *) ib##xxx - offsetof(struct ocrdma_##type, ibv_##xxx))) + +static inline struct ocrdma_devctx *get_ocrdma_ctx(struct ibv_context *ibctx) +{ + return get_ocrdma_xxx(ctx, devctx); +} + +static inline struct ocrdma_device *get_ocrdma_dev(struct ibv_device *ibdev) +{ + return get_ocrdma_xxx(dev, device); +} + +static inline struct ocrdma_qp *get_ocrdma_qp(struct ibv_qp *ibqp) +{ + return get_ocrdma_xxx(qp, qp); +} + +static inline struct ocrdma_srq *get_ocrdma_srq(struct ibv_srq *ibsrq) +{ + return get_ocrdma_xxx(srq, srq); +} + +static inline struct ocrdma_pd *get_ocrdma_pd(struct ibv_pd *ibpd) +{ + return get_ocrdma_xxx(pd, pd); +} + +static inline struct ocrdma_cq *get_ocrdma_cq(struct ibv_cq *ibcq) +{ + return get_ocrdma_xxx(cq, cq); +} + +static inline struct ocrdma_ah *get_ocrdma_ah(struct ibv_ah *ibah) +{ + return get_ocrdma_xxx(ah, ah); +} + +struct ibv_device *ocrdma_driver_init(const char *, int); + +void ocrdma_init_ahid_tbl(struct ocrdma_devctx *ctx); +int ocrdma_query_device(struct ibv_context *, struct ibv_device_attr *); +int ocrdma_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *); +struct ibv_pd *ocrdma_alloc_pd(struct ibv_context *); +int ocrdma_free_pd(struct ibv_pd *); +struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *, void *, size_t, + int ibv_access_flags); +int ocrdma_dereg_mr(struct ibv_mr *); + +struct ibv_cq *ocrdma_create_cq(struct ibv_context *, int, + struct ibv_comp_channel *, int); +int ocrdma_resize_cq(struct ibv_cq *, int); +int ocrdma_destroy_cq(struct ibv_cq *); +int ocrdma_poll_cq(struct ibv_cq *, int, struct ibv_wc *); +int ocrdma_arm_cq(struct ibv_cq *, int); +void ocrdma_cq_handler(struct ibv_cq *); + +struct ibv_qp *ocrdma_create_qp(struct ibv_pd *, struct ibv_qp_init_attr *); +int ocrdma_modify_qp(struct ibv_qp *, struct ibv_qp_attr *, + int ibv_qp_attr_mask); +int ocrdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, + struct ibv_qp_init_attr *init_attr); +int ocrdma_destroy_qp(struct ibv_qp *); +int ocrdma_post_send(struct ibv_qp *, struct ibv_send_wr *, + struct ibv_send_wr **); +int ocrdma_post_recv(struct ibv_qp *, struct ibv_recv_wr *, + struct ibv_recv_wr **); + +struct ibv_srq *ocrdma_create_srq(struct ibv_pd *, struct ibv_srq_init_attr *); +int ocrdma_modify_srq(struct ibv_srq *, struct ibv_srq_attr *, int); +int ocrdma_destroy_srq(struct ibv_srq *); +int ocrdma_query_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr); +int ocrdma_post_srq_recv(struct ibv_srq *, struct ibv_recv_wr *, + struct ibv_recv_wr **); +struct ibv_ah *ocrdma_create_ah(struct ibv_pd *, struct ibv_ah_attr *); +int ocrdma_destroy_ah(struct ibv_ah *); +int ocrdma_attach_mcast(struct ibv_qp *, const union ibv_gid *, uint16_t); +int ocrdma_detach_mcast(struct ibv_qp *, const union ibv_gid *, uint16_t); +void ocrdma_async_event(struct ibv_async_event *event); + +#endif /* __OCRDMA_MAIN_H__ */ diff --git a/src/ocrdma_verbs.c b/src/ocrdma_verbs.c new file mode 100644 index 0000000..9796dff --- /dev/null +++ b/src/ocrdma_verbs.c @@ -0,0 +1,2198 @@ +/* + * Copyright (C) 2008-2013 Emulex. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ocrdma_main.h" +#include "ocrdma_abi.h" +#include "ocrdma_list.h" + +static void ocrdma_ring_cq_db(struct ocrdma_cq *cq, uint32_t armed, + int solicited, uint32_t num_cqe); + +static inline uint32_t ocrdma_swap_endianness(uint32_t val) +{ + return ((val & 0xFF000000) >> 24) | ((val & 0xFF) << 24) | + ((val & 0xFF00) << 8) | ((val & 0xFF0000) >> 8); +} + +static inline uint32_t ocrdma_cpu_to_le(uint32_t val) +{ +#if __BYTE_ORDER == __BIG_ENDIAN + return ocrdma_swap_endianness(val); +#else + return val; +#endif +} + +static inline uint32_t ocrdma_le_to_cpu(uint32_t val) +{ +#if __BYTE_ORDER == __BIG_ENDIAN + return ocrdma_swap_endianness(val); +#else + return val; +#endif +} + +static inline uint32_t ocrdma_cpu_to_be(uint32_t val) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + return ocrdma_swap_endianness(val); +#else + return val; +#endif +} + +static inline void ocrdma_swap_cpu_to_le(void *dst, uint32_t len) +{ +#if __BYTE_ORDER == __BIG_ENDIAN + int i = 0; + uint32_t *src_ptr = dst; + uint32_t *dst_ptr = dst; + for (; i < (len / 4); i++) + *dst_ptr++ = ocrdma_swap_endianness(*src_ptr++); +#endif +} + +/* + * ocrdma_query_device + */ +int ocrdma_query_device(struct ibv_context *context, + struct ibv_device_attr *attr) +{ + struct ibv_query_device cmd; + uint64_t fw_ver; + struct ocrdma_device *dev = get_ocrdma_dev(context->device); + int status; + + bzero(attr, sizeof *attr); + status = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof cmd); + memcpy(attr->fw_ver, dev->fw_ver, sizeof(dev->fw_ver)); + return status; +} + +/* + * ocrdma_query_port + */ +int ocrdma_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + int status; + status = ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); + return status; +} + +#define OCRDMA_INVALID_AH_IDX 0xffffffff +void ocrdma_init_ahid_tbl(struct ocrdma_devctx *ctx) +{ + int i; + + pthread_mutex_init(&ctx->tbl_lock, NULL); + for (i = 0; i < (ctx->ah_tbl_len / sizeof(uint32_t)); i++) + ctx->ah_tbl[i] = OCRDMA_INVALID_AH_IDX; +} + +static int ocrdma_alloc_ah_tbl_id(struct ocrdma_devctx *ctx) +{ + int i; + int status = -EINVAL; + pthread_mutex_lock(&ctx->tbl_lock); + + for (i = 0; i < (ctx->ah_tbl_len / sizeof(uint32_t)); i++) { + if (ctx->ah_tbl[i] == OCRDMA_INVALID_AH_IDX) { + ctx->ah_tbl[i] = ctx->ah_tbl_len; + status = i; + break; + } + } + pthread_mutex_unlock(&ctx->tbl_lock); + return status; +} + +static void ocrdma_free_ah_tbl_id(struct ocrdma_devctx *ctx, int idx) +{ + pthread_mutex_lock(&ctx->tbl_lock); + ctx->ah_tbl[idx] = OCRDMA_INVALID_AH_IDX; + pthread_mutex_unlock(&ctx->tbl_lock); +} + +/* + * ocrdma_alloc_pd + */ +struct ibv_pd *ocrdma_alloc_pd(struct ibv_context *context) +{ + struct ocrdma_alloc_pd_req cmd; + struct ocrdma_alloc_pd_resp resp; + struct ocrdma_pd *pd; + uint64_t map_address = 0; + + pd = malloc(sizeof *pd); + if (!pd) + return NULL; + bzero(pd, sizeof *pd); + memset(&cmd, 0, sizeof(cmd)); + + if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd.cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp)) { + free(pd); + return NULL; + } + pd->dev = get_ocrdma_dev(context->device); + pd->uctx = get_ocrdma_ctx(context); + + if (resp.dpp_enabled) { + map_address = ((uint64_t) resp.dpp_page_addr_hi << 32) | + resp.dpp_page_addr_lo; + pd->dpp_va = mmap(NULL, OCRDMA_DPP_PAGE_SIZE, PROT_WRITE, + MAP_SHARED, context->cmd_fd, map_address); + if (pd->dpp_va == MAP_FAILED) { + ocrdma_free_pd(&pd->ibv_pd); + return NULL; + } + } + return &pd->ibv_pd; +} + +/* + * ocrdma_free_pd + */ +int ocrdma_free_pd(struct ibv_pd *ibpd) +{ + int status; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + + status = ibv_cmd_dealloc_pd(ibpd); + if (pd->dpp_va) + munmap((void *)pd->dpp_va, OCRDMA_DPP_PAGE_SIZE); + if (status) + return status; + free(pd); + return 0; +} + +/* + * ocrdma_reg_mr + */ +struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, + size_t len, int access) +{ + struct ocrdma_mr *mr; + struct ibv_reg_mr cmd; + struct ocrdma_reg_mr_resp resp; + uint64_t hca_va = (uintptr_t) addr; + + mr = malloc(sizeof *mr); + if (!mr) + return NULL; + bzero(mr, sizeof *mr); + + if (ibv_cmd_reg_mr(pd, addr, len, hca_va, + access, &mr->ibv_mr, &cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp)) { + free(mr); + return NULL; + } + return &mr->ibv_mr; +} + +/* + * ocrdma_dereg_mr + */ +int ocrdma_dereg_mr(struct ibv_mr *mr) +{ + int status; + status = ibv_cmd_dereg_mr(mr); + if (status) + return status; + free(mr); + return 0; +} + +/* + * ocrdma_create_cq + */ +static struct ibv_cq *ocrdma_create_cq_common(struct ibv_context *context, + int cqe, + struct ibv_comp_channel *channel, + int comp_vector, int dpp_cq) +{ + int status; + struct ocrdma_create_cq_req cmd; + struct ocrdma_create_cq_resp resp; + struct ocrdma_cq *cq; + struct ocrdma_device *dev = get_ocrdma_dev(context->device); + void *map_addr; + + cq = malloc(sizeof *cq); + if (!cq) + return NULL; + + bzero(cq, sizeof *cq); + cmd.dpp_cq = dpp_cq; + status = ibv_cmd_create_cq(context, cqe, channel, comp_vector, + &cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp); + if (status) + goto cq_err1; + + pthread_spin_init(&cq->cq_lock, PTHREAD_PROCESS_PRIVATE); + cq->dev = dev; + cq->cq_id = resp.cq_id; + cq->cq_dbid = resp.cq_id; + cq->cq_mem_size = resp.size; + cq->max_hw_cqe = resp.max_hw_cqe; + cq->phase_change = resp.phase_change; + cq->va = mmap(NULL, resp.size, PROT_READ | PROT_WRITE, + MAP_SHARED, context->cmd_fd, resp.page_addr[0]); + if (cq->va == MAP_FAILED) + goto cq_err2; + + map_addr = mmap(NULL, resp.db_page_size, PROT_WRITE, + MAP_SHARED, context->cmd_fd, resp.db_page_addr); + if (map_addr == MAP_FAILED) + goto cq_err2; + cq->db_va = map_addr; + cq->db_size = resp.db_page_size; + cq->phase = OCRDMA_CQE_VALID; + if (!dpp_cq) { + cq->arm_needed = 1; + ocrdma_ring_cq_db(cq, 0, 0, 0); + } + cq->ibv_cq.cqe = cqe; + INIT_DBLY_LIST_HEAD(&cq->sq_head); + INIT_DBLY_LIST_HEAD(&cq->rq_head); + return &cq->ibv_cq; +cq_err2: + (void)ibv_cmd_destroy_cq(&cq->ibv_cq); +cq_err1: + free(cq); + return NULL; +} + +struct ibv_cq *ocrdma_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector) +{ + return ocrdma_create_cq_common(context, cqe, channel, comp_vector, 0); +} + +#ifdef DPP_CQ_SUPPORT +static struct ocrdma_cq *ocrdma_create_dpp_cq(struct ibv_context *context, + int cqe) +{ + struct ibv_cq *ibcq; + ibcq = ocrdma_create_cq_common(context, cqe, 0, 0, 1); + if (ibcq) + return get_ocrdma_cq(ibcq); + return NULL; +} +#endif + +/* + * ocrdma_resize_cq + */ +int ocrdma_resize_cq(struct ibv_cq *ibcq, int new_entries) +{ + int status; + struct ibv_resize_cq cmd; + struct ibv_resize_cq_resp resp; + status = ibv_cmd_resize_cq(ibcq, new_entries, + &cmd, sizeof cmd, &resp, sizeof resp); + if (status == 0) + ibcq->cqe = new_entries; + return status; +} + +/* + * ocrdma_destroy_cq + */ +int ocrdma_destroy_cq(struct ibv_cq *ibv_cq) +{ + struct ocrdma_cq *cq = get_ocrdma_cq(ibv_cq); + ibv_cmd_destroy_cq(ibv_cq); + if (cq->db_va) + munmap((void *)cq->db_va, cq->db_size); + free(cq); + return 0; +} + +static void ocrdma_add_qpn_map(struct ocrdma_device *dev, struct ocrdma_qp *qp) +{ + pthread_mutex_lock(&dev->dev_lock); + dev->qp_tbl[qp->id] = qp; + pthread_mutex_unlock(&dev->dev_lock); +} + +static void _ocrdma_del_qpn_map(struct ocrdma_device *dev, struct ocrdma_qp *qp) +{ + dev->qp_tbl[qp->id] = NULL; +} + +struct ibv_srq *ocrdma_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *init_attr) +{ + int status = 0; + struct ocrdma_srq *srq; + struct ocrdma_create_srq_cmd cmd; + struct ocrdma_create_srq_resp resp; + void *map_addr; + + srq = calloc(1, sizeof *srq); + if (!srq) + return NULL; + + pthread_spin_init(&srq->q_lock, PTHREAD_PROCESS_PRIVATE); + status = ibv_cmd_create_srq(pd, &srq->ibv_srq, init_attr, &cmd.ibv_cmd, + sizeof cmd, &resp.ibv_resp, sizeof resp); + if (status) + goto cmd_err; + + srq->dev = get_ocrdma_pd(pd)->dev; + srq->rq.dbid = resp.rq_dbid; + srq->rq.max_sges = init_attr->attr.max_sge; + srq->rq.max_cnt = resp.num_rqe_allocated; + srq->rq.max_wqe_idx = resp.num_rqe_allocated - 1; + srq->rq.entry_size = srq->dev->rqe_size; + srq->rqe_wr_id_tbl = calloc(srq->rq.max_cnt, sizeof(uint64_t)); + if (srq->rqe_wr_id_tbl == NULL) + goto map_err; + + srq->bit_fields_len = + (srq->rq.max_cnt / 32) + (srq->rq.max_cnt % 32 ? 1 : 0); + srq->idx_bit_fields = malloc(srq->bit_fields_len * sizeof(uint32_t)); + if (srq->idx_bit_fields == NULL) + goto map_err; + memset(srq->idx_bit_fields, 0xff, + srq->bit_fields_len * sizeof(uint32_t)); + + if (resp.num_rq_pages > 1) + goto map_err; + + map_addr = mmap(NULL, resp.rq_page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, pd->context->cmd_fd, resp.rq_page_addr[0]); + if (map_addr == MAP_FAILED) + goto map_err; + srq->rq.len = resp.rq_page_size; + srq->rq.va = map_addr; + + map_addr = mmap(NULL, resp.db_page_size, PROT_WRITE, + MAP_SHARED, pd->context->cmd_fd, resp.db_page_addr); + if (map_addr == MAP_FAILED) + goto map_err; + srq->db_va = (uint8_t *) map_addr + resp.db_rq_offset; + srq->db_shift = resp.db_shift; + srq->db_size = resp.db_page_size; + return &srq->ibv_srq; + +map_err: + ocrdma_destroy_srq(&srq->ibv_srq); + return NULL; + +cmd_err: + pthread_spin_destroy(&srq->q_lock); + free(srq); + return NULL; +} + +int ocrdma_modify_srq(struct ibv_srq *ibsrq, + struct ibv_srq_attr *attr, int attr_mask) +{ + int status; + struct ocrdma_device *dev; + struct ocrdma_srq *srq; + struct ibv_modify_srq cmd; + + srq = get_ocrdma_srq(ibsrq); + dev = srq->dev; + + status = ibv_cmd_modify_srq(ibsrq, attr, attr_mask, &cmd, sizeof cmd); + return status; +} + +int ocrdma_query_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr) +{ + int status; + struct ibv_query_srq cmd; + struct ocrdma_device *dev; + struct ocrdma_srq *srq; + + srq = get_ocrdma_srq(ibsrq); + dev = srq->dev; + status = ibv_cmd_query_srq(ibsrq, attr, &cmd, sizeof cmd); + return status; +} + +int ocrdma_destroy_srq(struct ibv_srq *ibsrq) +{ + int status; + int id; + struct ocrdma_srq *srq; + struct ocrdma_device *dev; + srq = get_ocrdma_srq(ibsrq); + dev = srq->dev; + + id = dev->id; + status = ibv_cmd_destroy_srq(ibsrq); + if (srq->idx_bit_fields) + free(srq->idx_bit_fields); + if (srq->rqe_wr_id_tbl) + free(srq->rqe_wr_id_tbl); + if (srq->db_va) { + munmap((void *)srq->db_va, srq->db_size); + srq->db_va = 0; + } + if (srq->rq.va) { + munmap(srq->rq.va, srq->rq.len); + srq->rq.va = NULL; + } + pthread_spin_destroy(&srq->q_lock); + free(srq); + return status; +} + +/* + * ocrdma_create_qp + */ +struct ibv_qp *ocrdma_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attrs) +{ + int status = 0; + struct ocrdma_create_qp_cmd cmd; + struct ocrdma_create_qp_uresp resp; + struct ocrdma_qp *qp; + void *map_addr; +#ifdef DPP_CQ_SUPPORT + struct ocrdma_dpp_cqe *dpp_cqe = NULL; +#endif + + qp = calloc(1, sizeof *qp); + if (!qp) + return NULL; + memset(&cmd, 0, sizeof(cmd)); + + qp->qp_type = attrs->qp_type; + pthread_spin_init(&qp->q_lock, PTHREAD_PROCESS_PRIVATE); + +#ifdef DPP_CQ_SUPPORT + if (attrs->cap.max_inline_data) { + qp->dpp_cq = ocrdma_create_dpp_cq(pd->context, + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT); + if (qp->dpp_cq) { + cmd.enable_dpp_cq = 1; + cmd.dpp_cq_id = qp->dpp_cq->cq_id; + /* Write invalid index for the first entry */ + dpp_cqe = (struct ocrdma_dpp_cqe *)qp->dpp_cq->va; + dpp_cqe->wqe_idx_valid = 0xFFFF; + qp->dpp_prev_indx = 0xFFFF; + } + } +#endif + status = ibv_cmd_create_qp(pd, &qp->ibv_qp, attrs, &cmd.ibv_cmd, + sizeof cmd, &resp.ibv_resp, sizeof resp); + if (status) + goto mbx_err; + + qp->dev = get_ocrdma_dev(pd->context->device); + qp->id = resp.qp_id; + + ocrdma_add_qpn_map(qp->dev, qp); + + qp->sq.dbid = resp.sq_dbid; + + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->max_inline_data = attrs->cap.max_inline_data; + + qp->signaled = attrs->sq_sig_all; + + qp->sq.max_cnt = resp.num_wqe_allocated; + qp->sq.max_wqe_idx = resp.num_wqe_allocated - 1; + qp->sq.entry_size = qp->dev->wqe_size; + if (attrs->srq) + qp->srq = get_ocrdma_srq(attrs->srq); + else { + qp->rq.dbid = resp.rq_dbid; + qp->rq.max_sges = attrs->cap.max_recv_sge; + qp->rq.max_cnt = resp.num_rqe_allocated; + qp->rq.max_wqe_idx = resp.num_rqe_allocated - 1; + qp->rq.entry_size = qp->dev->rqe_size; + qp->rqe_wr_id_tbl = calloc(qp->rq.max_cnt, sizeof(uint64_t)); + if (qp->rqe_wr_id_tbl == NULL) + goto map_err; + } + + qp->sq_cq = get_ocrdma_cq(attrs->send_cq); + qp->rq_cq = get_ocrdma_cq(attrs->recv_cq); + + qp->wqe_wr_id_tbl = calloc(qp->sq.max_cnt, sizeof(*qp->wqe_wr_id_tbl)); + if (qp->wqe_wr_id_tbl == NULL) + goto map_err; + + /* currently we support only one virtual page */ + if ((resp.num_sq_pages > 1) || (!attrs->srq && resp.num_rq_pages > 1)) + goto map_err; + + map_addr = mmap(NULL, resp.sq_page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, pd->context->cmd_fd, resp.sq_page_addr[0]); + if (map_addr == MAP_FAILED) + goto map_err; + qp->sq.va = map_addr; + qp->sq.len = resp.sq_page_size; + qp->db_shift = resp.db_shift; + + if (!attrs->srq) { + map_addr = mmap(NULL, resp.rq_page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, pd->context->cmd_fd, + resp.rq_page_addr[0]); + if (map_addr == MAP_FAILED) + goto map_err; + + qp->rq.len = resp.rq_page_size; + qp->rq.va = map_addr; + } + + map_addr = mmap(NULL, resp.db_page_size, PROT_WRITE, + MAP_SHARED, pd->context->cmd_fd, resp.db_page_addr); + if (map_addr == MAP_FAILED) + goto map_err; + + qp->db_va = map_addr; + qp->db_sq_va = (uint8_t *) map_addr + resp.db_sq_offset; + qp->db_rq_va = (uint8_t *) map_addr + resp.db_rq_offset; + + qp->db_size = resp.db_page_size; + + if (resp.dpp_credit) { + struct ocrdma_pd *opd = get_ocrdma_pd(pd); + map_addr = (uint8_t *) opd->dpp_va + + (resp.dpp_offset * qp->dev->wqe_size); + qp->dpp_q.max_cnt = 1; /* DPP is posted at the same offset */ + qp->dpp_q.free_cnt = resp.dpp_credit; + qp->dpp_q.va = map_addr; + qp->dpp_q.head = qp->dpp_q.tail = 0; + qp->dpp_q.entry_size = qp->dev->dpp_wqe_size; + qp->dpp_q.len = resp.dpp_credit * qp->dev->dpp_wqe_size; + qp->dpp_enabled = 1; + } else { + if (qp->dpp_cq) { + ocrdma_destroy_cq(&qp->dpp_cq->ibv_cq); + qp->dpp_cq = 0; + } + } + qp->state = IBV_QPS_RESET; + INIT_DBLY_LIST_NODE(&qp->sq_entry); + INIT_DBLY_LIST_NODE(&qp->rq_entry); + return &qp->ibv_qp; + +map_err: + ocrdma_destroy_qp(&qp->ibv_qp); + return NULL; +mbx_err: + pthread_spin_destroy(&qp->q_lock); + free(qp); + return NULL; +} + +/* + * ocrdma_query_qp + */ +int ocrdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + int status; + + status = + ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); + return status; +} + +enum ocrdma_qp_state get_ocrdma_qp_state(enum ibv_qp_state qps) +{ + switch (qps) { + case IBV_QPS_RESET: + return OCRDMA_QPS_RST; + case IBV_QPS_INIT: + return OCRDMA_QPS_INIT; + case IBV_QPS_RTR: + return OCRDMA_QPS_RTR; + case IBV_QPS_RTS: + return OCRDMA_QPS_RTS; + case IBV_QPS_SQD: + return OCRDMA_QPS_SQD; + case IBV_QPS_SQE: + return OCRDMA_QPS_SQE; + case IBV_QPS_ERR: + return OCRDMA_QPS_ERR; + }; + return OCRDMA_QPS_ERR; +} + +static int ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *cq, + struct ocrdma_qp *qp) +{ + struct ocrdma_qp *list_qp; + struct ocrdma_list_node *cur, *tmp; + int found = 0; + list_for_each_node_safe(cur, tmp, &cq->sq_head) { + list_qp = list_node(cur, struct ocrdma_qp, sq_entry); + if (qp == list_qp) { + found = 1; + break; + } + } + return found; +} + +static int ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *cq, + struct ocrdma_qp *qp) +{ + struct ocrdma_qp *list_qp; + struct ocrdma_list_node *cur, *tmp; + int found = 0; + list_for_each_node_safe(cur, tmp, &cq->rq_head) { + list_qp = list_node(cur, struct ocrdma_qp, rq_entry); + if (qp == list_qp) { + found = 1; + break; + } + } + return found; +} + +static void ocrdma_init_hwq_ptr(struct ocrdma_qp *qp) +{ + qp->sq.head = qp->sq.tail = 0; + qp->rq.head = qp->rq.tail = 0; + qp->dpp_q.head = qp->dpp_q.tail = 0; + qp->dpp_q.free_cnt = qp->dpp_q.max_cnt; +} + +static void ocrdma_del_flush_qp(struct ocrdma_qp *qp) +{ + int found = 0; + struct ocrdma_device *dev = qp->dev; + /* sync with any active CQ poll */ + + pthread_spin_lock(&dev->flush_q_lock); + found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp); + if (found) + list_del_node(&qp->sq_entry); + if (!qp->srq) { + found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp); + if (found) + list_del_node(&qp->rq_entry); + } + pthread_spin_unlock(&dev->flush_q_lock); +} + +static void ocrdma_flush_qp(struct ocrdma_qp *qp) +{ + int found; + + pthread_spin_lock(&qp->dev->flush_q_lock); + found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp); + if (!found) + list_add_node_tail(&qp->sq_entry, &qp->sq_cq->sq_head); + if (!qp->srq) { + found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp); + if (!found) + list_add_node_tail(&qp->rq_entry, &qp->rq_cq->rq_head); + } + pthread_spin_unlock(&qp->dev->flush_q_lock); +} + +static int ocrdma_qp_state_machine(struct ocrdma_qp *qp, + enum ibv_qp_state new_ib_state) +{ + int status = 0; + enum ocrdma_qp_state new_state; + new_state = get_ocrdma_qp_state(new_ib_state); + + pthread_spin_lock(&qp->q_lock); + + if (new_state == qp->state) { + pthread_spin_unlock(&qp->q_lock); + return 1; + } + + switch (qp->state) { + case OCRDMA_QPS_RST: + switch (new_state) { + case OCRDMA_QPS_RST: + break; + case OCRDMA_QPS_INIT: + /* init pointers to place wqe/rqe at start of hw q */ + ocrdma_init_hwq_ptr(qp); + /* detach qp from the CQ flush list */ + ocrdma_del_flush_qp(qp); + break; + default: + status = -EINVAL; + break; + }; + break; + case OCRDMA_QPS_INIT: + /* qps: INIT->XXX */ + switch (new_state) { + case OCRDMA_QPS_INIT: + break; + case OCRDMA_QPS_RTR: + break; + case OCRDMA_QPS_ERR: + ocrdma_flush_qp(qp); + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case OCRDMA_QPS_RTR: + /* qps: RTS->XXX */ + switch (new_state) { + case OCRDMA_QPS_RTS: + break; + case OCRDMA_QPS_ERR: + ocrdma_flush_qp(qp); + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case OCRDMA_QPS_RTS: + /* qps: RTS->XXX */ + switch (new_state) { + case OCRDMA_QPS_SQD: + case OCRDMA_QPS_SQE: + break; + case OCRDMA_QPS_ERR: + ocrdma_flush_qp(qp); + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case OCRDMA_QPS_SQD: + /* qps: SQD->XXX */ + switch (new_state) { + case OCRDMA_QPS_RTS: + case OCRDMA_QPS_SQE: + case OCRDMA_QPS_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case OCRDMA_QPS_SQE: + switch (new_state) { + case OCRDMA_QPS_RTS: + case OCRDMA_QPS_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case OCRDMA_QPS_ERR: + /* qps: ERR->XXX */ + switch (new_state) { + case OCRDMA_QPS_RST: + break; + default: + status = -EINVAL; + break; + }; + break; + default: + status = -EINVAL; + break; + }; + if (!status) + qp->state = new_state; + + pthread_spin_unlock(&qp->q_lock); + return status; +} + +/* + * ocrdma_modify_qp + */ +int ocrdma_modify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, + int attr_mask) +{ + struct ibv_modify_qp cmd; + struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); + int status; + status = ibv_cmd_modify_qp(ibqp, attr, attr_mask, &cmd, sizeof cmd); + if ((!status) && (attr_mask & IBV_QP_STATE)) + ocrdma_qp_state_machine(qp, attr->qp_state); + return status; +} + +static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx) +{ + int i = idx / 32; + unsigned int mask = (1 << (idx % 32)); + if (srq->idx_bit_fields[i] & mask) + srq->idx_bit_fields[i] &= ~mask; + else + srq->idx_bit_fields[i] |= mask; + +} + +static int ocrdma_srq_get_idx(struct ocrdma_srq *srq) +{ + int row = 0; + int indx = 0; + for (row = 0; row < srq->bit_fields_len; row++) { + if (srq->idx_bit_fields[row]) { + indx = ffs(srq->idx_bit_fields[row]); + indx = (row * 32) + (indx - 1); + if (indx >= srq->rq.max_cnt) + assert(0); + ocrdma_srq_toggle_bit(srq, indx); + break; + } + } + if (row == srq->bit_fields_len) + assert(0); + return indx; +} + +static int ocrdma_dppq_credits(struct ocrdma_qp_hwq_info *q) +{ + return ((q->max_wqe_idx - q->head) + q->tail) % q->free_cnt; +} + +static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) +{ + return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt; +} + +static int is_hw_sq_empty(struct ocrdma_qp *qp) +{ + return ((qp->sq.tail == qp->sq.head) ? 1 : 0); +} + +static inline int is_hw_rq_empty(struct ocrdma_qp *qp) +{ + return ((qp->rq.head == qp->rq.tail) ? 1 : 0); +} + +static inline void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q) +{ + return q->va + (q->head * q->entry_size); +} + +static inline void *ocrdma_wq_tail(struct ocrdma_qp_hwq_info *q) +{ + return q->va + (q->tail * q->entry_size); +} + +static inline void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q, + uint32_t idx) +{ + return q->va + (idx * q->entry_size); +} + +static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q) +{ + q->head = (q->head + 1) & q->max_wqe_idx; +} + +static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q) +{ + q->tail = (q->tail + 1) & q->max_wqe_idx; +} + +static inline void ocrdma_hwq_inc_tail_by_idx(struct ocrdma_qp_hwq_info *q, + int idx) +{ + q->tail = (idx + 1) & q->max_wqe_idx; +} + +static int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) +{ + int cqe_valid; + cqe_valid = + ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID; + return (cqe_valid == cq->phase); +} + +static int is_cqe_for_sq(struct ocrdma_cqe *cqe) +{ + return (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_QTYPE) ? 0 : 1; +} + +static int is_cqe_imm(struct ocrdma_cqe *cqe) +{ + return (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_IMM) ? 1 : 0; +} + +static int is_cqe_wr_imm(struct ocrdma_cqe *cqe) +{ + return (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_WRITE_IMM) ? 1 : 0; +} + +static inline void ocrdma_srq_inc_tail(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe) +{ + int wqe_idx; + wqe_idx = (ocrdma_le_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & qp->srq->rq.max_wqe_idx; + pthread_spin_lock(&qp->srq->q_lock); + ocrdma_hwq_inc_tail(&qp->srq->rq); + ocrdma_srq_toggle_bit(qp->srq, wqe_idx); + pthread_spin_unlock(&qp->srq->q_lock); +} + +static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) +{ + int discard_cnt = 0; + uint32_t cur_getp, stop_getp; + struct ocrdma_cqe *cqe; + uint32_t qpn = 0; + int wqe_idx; + + pthread_spin_lock(&cq->cq_lock); + + /* traverse through the CQEs in the hw CQ, + * find the matching CQE for a given qp, + * mark the matching one discarded=1. + * discard the cqe. + * ring the doorbell in the poll_cq() as + * we don't complete out of order cqe. + */ + cur_getp = cq->getp; + /* find upto when do we reap the cq. */ + stop_getp = cur_getp; + do { + if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp))) + break; + + cqe = cq->va + cur_getp; + /* if (a) no valid cqe, or (b) done reading full hw cq, or + * (c) qp_xq becomes empty. + * then exit + */ + qpn = ocrdma_le_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK; + /* if previously discarded cqe found, skip that too. */ + /* check for matching qp */ + if ((qpn == 0) || (qpn != qp->id)) + goto skip_cqe; + + /* mark cqe discarded so that it is not picked up later + * in the poll_cq(). + */ + discard_cnt += 1; + /* discard by marking qp_id = 0 */ + cqe->cmn.qpn = 0; + if (is_cqe_for_sq(cqe)) { + wqe_idx = (ocrdma_le_to_cpu(cqe->wq.wqeidx) & + OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx; + ocrdma_hwq_inc_tail_by_idx(&qp->sq, wqe_idx); + } else { + if (qp->srq) + ocrdma_srq_inc_tail(qp, cqe); + else + ocrdma_hwq_inc_tail(&qp->rq); + } +skip_cqe: + cur_getp = (cur_getp + 1) % cq->max_hw_cqe; + + } while (cur_getp != stop_getp); + pthread_spin_unlock(&cq->cq_lock); +} + +/* + * ocrdma_destroy_qp + */ +int ocrdma_destroy_qp(struct ibv_qp *ibqp) +{ + int status = 0; + struct ocrdma_qp *qp; + struct ocrdma_device *dev; + int id; + qp = get_ocrdma_qp(ibqp); + dev = qp->dev; + id = dev->id; + /* + * acquire CQ lock while destroy is in progress, in order to + * protect against proessing in-flight CQEs for this QP. + */ + pthread_spin_lock(&qp->sq_cq->cq_lock); + + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + pthread_spin_lock(&qp->rq_cq->cq_lock); + + _ocrdma_del_qpn_map(qp->dev, qp); + + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + pthread_spin_unlock(&qp->rq_cq->cq_lock); + + pthread_spin_unlock(&qp->sq_cq->cq_lock); + + if (qp->db_va) + munmap((void *)qp->db_va, qp->db_size); + if (qp->rq.va) + munmap(qp->rq.va, qp->rq.len); + if (qp->sq.va) + munmap(qp->sq.va, qp->sq.len); + + /* ensure that CQEs for newly created QP (whose id may be same with + * one which just getting destroyed are same), dont get + * discarded until the old CQEs are discarded. + */ + pthread_mutex_lock(&dev->dev_lock); + status = ibv_cmd_destroy_qp(ibqp); + + ocrdma_discard_cqes(qp, qp->sq_cq); + ocrdma_discard_cqes(qp, qp->rq_cq); + pthread_mutex_unlock(&dev->dev_lock); + + ocrdma_del_flush_qp(qp); + + pthread_spin_destroy(&qp->q_lock); + if (qp->rqe_wr_id_tbl) + free(qp->rqe_wr_id_tbl); + if (qp->wqe_wr_id_tbl) + free(qp->wqe_wr_id_tbl); + if (qp->dpp_cq) + ocrdma_destroy_cq(&qp->dpp_cq->ibv_cq); + free(qp); + + return status; +} + +static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) +{ + uint32_t db_val = ocrdma_cpu_to_le((qp->sq.dbid | (1 << 16))); + *(uint32_t *) (((uint8_t *) qp->db_sq_va)) = db_val; +} + +static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) +{ + uint32_t db_val = ocrdma_cpu_to_le((qp->rq.dbid | (1 << qp->db_shift))); + *(uint32_t *) ((uint8_t *) qp->db_rq_va) = db_val; +} + +static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) +{ + uint32_t db_val = ocrdma_cpu_to_le(srq->rq.dbid | (1 << srq->db_shift)); + *(uint32_t *) (srq->db_va) = db_val; +} + +static void ocrdma_ring_cq_db(struct ocrdma_cq *cq, uint32_t armed, + int solicited, uint32_t num_cqe) +{ + uint32_t val; + + val = cq->cq_dbid & OCRDMA_DB_CQ_RING_ID_MASK; + val |= ((cq->cq_dbid & OCRDMA_DB_CQ_RING_ID_EXT_MASK) << + OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT); + + if (armed) + val |= (1 << OCRDMA_DB_CQ_REARM_SHIFT); + if (solicited) + val |= (1 << OCRDMA_DB_CQ_SOLICIT_SHIFT); + val |= (num_cqe << OCRDMA_DB_CQ_NUM_POPPED_SHIFT); + + *(uint32_t *) ((uint8_t *) (cq->db_va) + OCRDMA_DB_CQ_OFFSET) = + ocrdma_cpu_to_le(val); +} + +static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ibv_send_wr *wr) +{ + struct ocrdma_ewqe_ud_hdr *ud_hdr = + (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); + struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); + + ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; + ud_hdr->qkey = wr->wr.ud.remote_qkey; + ud_hdr->rsvd_ahid = ah->id; +} + +static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr, + struct ocrdma_sge *sge, int num_sge, + struct ibv_sge *sg_list) +{ + int i; + for (i = 0; i < num_sge; i++) { + sge[i].lrkey = sg_list[i].lkey; + sge[i].addr_lo = sg_list[i].addr; + sge[i].addr_hi = sg_list[i].addr >> 32; + sge[i].len = sg_list[i].length; + hdr->total_len += sg_list[i].length; + } + if (num_sge == 0) + memset(sge, 0, sizeof(*sge)); +} + + +static inline uint32_t ocrdma_sglist_len(struct ibv_sge *sg_list, int num_sge) +{ + uint32_t total_len = 0, i; + + for (i = 0; i < num_sge; i++) + total_len += sg_list[i].length; + return total_len; +} + +static inline int ocrdma_build_inline_sges(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ocrdma_sge *sge, + struct ibv_send_wr *wr, + uint32_t wqe_size) +{ + int i; + + if (wr->send_flags & IBV_SEND_INLINE && qp->qp_type != IBV_QPT_UD) { + + hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge); + if (hdr->total_len > qp->max_inline_data) { + ocrdma_err + ("%s() supported_len=0x%x, unspported len req=0x%x\n", + __func__, qp->max_inline_data, hdr->total_len); + return -EINVAL; + } + + for (i = 0; i < wr->num_sge; i++) { + memcpy(sge, + (void *)(unsigned long)wr->sg_list[i].addr, + wr->sg_list[i].length); + sge += wr->sg_list[i].length; + } + + wqe_size += ROUND_UP_X(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES); + if (0 == hdr->total_len) + wqe_size += sizeof(struct ocrdma_sge); + hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT); + } else { + ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list); + if (wr->num_sge) + wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge)); + else + wqe_size += sizeof(struct ocrdma_sge); + hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + } + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + return 0; +} + +static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ibv_send_wr *wr) +{ + int status; + struct ocrdma_sge *sge; + uint32_t wqe_size = sizeof(*hdr); + + if (qp->qp_type == IBV_QPT_UD) { + wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr); + ocrdma_build_ud_hdr(qp, hdr, wr); + sge = (struct ocrdma_sge *)(hdr + 2); + } else + sge = (struct ocrdma_sge *)(hdr + 1); + + status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); + + return status; +} + +static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ibv_send_wr *wr) +{ + int status; + struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); + struct ocrdma_sge *sge = ext_rw + 1; + uint32_t wqe_size = sizeof(*hdr) + sizeof(*ext_rw); + + status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); + if (status) + return status; + + ext_rw->addr_lo = wr->wr.rdma.remote_addr; + ext_rw->addr_hi = (wr->wr.rdma.remote_addr >> 32); + ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->len = hdr->total_len; + + return 0; +} + +static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ibv_send_wr *wr) +{ + struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); + struct ocrdma_sge *sge = ext_rw + 1; + uint32_t wqe_size = ((wr->num_sge + 1) * sizeof(*sge)) + sizeof(*hdr); + + hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); + + ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list); + + ext_rw->addr_lo = wr->wr.rdma.remote_addr; + ext_rw->addr_hi = (wr->wr.rdma.remote_addr >> 32); + ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->len = hdr->total_len; + +} + +/* Dpp cq is single entry cq, we just need to read + * wqe index from first 16 bits at 0th cqe index. + */ +static void ocrdma_poll_dpp_cq(struct ocrdma_qp *qp) +{ + struct ocrdma_cq *cq = qp->dpp_cq; + struct ocrdma_dpp_cqe *cqe; + int idx = 0; + cqe = ((struct ocrdma_dpp_cqe *)cq->va); + idx = cqe->wqe_idx_valid & OCRDMA_DPP_WQE_INDEX_MASK; + + if (idx != qp->dpp_prev_indx) { + ocrdma_hwq_inc_tail_by_idx(&qp->dpp_q, idx); + qp->dpp_prev_indx = idx; + } +} + +static uint32_t ocrdma_get_hdr_len(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr) +{ + uint32_t hdr_sz = sizeof(*hdr); + if (qp->qp_type == IBV_QPT_UD) + hdr_sz += sizeof(struct ocrdma_ewqe_ud_hdr); + if (hdr->cw & (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT)) + hdr_sz += sizeof(struct ocrdma_sge); + return hdr_sz / sizeof(uint32_t); +} + +static void ocrdma_build_dpp_wqe(void *va, struct ocrdma_hdr_wqe *wqe, + uint32_t hdr_len) +{ + uint32_t pyld_len = (wqe->cw >> OCRDMA_WQE_SIZE_SHIFT) * 2; + uint32_t i = 0; + /* convert WQE header to LE format */ + for (; i < hdr_len; i++) + *((uint32_t *) va + i) = + ocrdma_cpu_to_le(*((uint32_t *) wqe + i)); + /* Convertion of data is done in HW */ + for (; i < pyld_len; i++) + *((uint32_t *) va + i) = (*((uint32_t *) wqe + i)); + wc_wmb(); +} + +static void ocrdma_post_dpp_wqe(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr) +{ + if (qp->dpp_cq && ocrdma_dppq_credits(&qp->dpp_q) == 0) + ocrdma_poll_dpp_cq(qp); + if (!qp->dpp_cq || ocrdma_dppq_credits(&qp->dpp_q)) { + ocrdma_build_dpp_wqe(qp->dpp_q.va, hdr, + ocrdma_get_hdr_len(qp, hdr)); + qp->wqe_wr_id_tbl[qp->sq.head].dpp_wqe = 1; + qp->wqe_wr_id_tbl[qp->sq.head].dpp_wqe_idx = qp->dpp_q.head; + /* if dpp cq is not enabled, we can post + * wqe as soon as we receive and adapter + * takes care of flow control. + */ + if (qp->dpp_cq) + ocrdma_hwq_inc_head(&qp->dpp_q); + } else + qp->wqe_wr_id_tbl[qp->sq.head].dpp_wqe = 0; +} + +/* + * ocrdma_post_send + */ +int ocrdma_post_send(struct ibv_qp *ib_qp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + int status = 0; + struct ocrdma_qp *qp; + struct ocrdma_hdr_wqe *hdr; + + qp = get_ocrdma_qp(ib_qp); + + pthread_spin_lock(&qp->q_lock); + if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { + pthread_spin_unlock(&qp->q_lock); + *bad_wr = wr; + return -EINVAL; + } + + while (wr) { + if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || + wr->num_sge > qp->sq.max_sges) { + *bad_wr = wr; + status = -ENOMEM; + break; + } + hdr = ocrdma_hwq_head(&qp->sq); + hdr->cw = 0; + hdr->total_len = 0; + if (wr->send_flags & IBV_SEND_SIGNALED || qp->signaled) + hdr->cw = (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT); + if (wr->send_flags & IBV_SEND_FENCE) + hdr->cw |= + (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT); + if (wr->send_flags & IBV_SEND_SOLICITED) + hdr->cw |= + (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT); + + qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id; + switch (wr->opcode) { + case IBV_WR_SEND_WITH_IMM: + hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT); + hdr->immdt = ntohl(wr->imm_data); + case IBV_WR_SEND: + hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT); + status = ocrdma_build_send(qp, hdr, wr); + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT); + hdr->immdt = ntohl(wr->imm_data); + case IBV_WR_RDMA_WRITE: + hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT); + status = ocrdma_build_write(qp, hdr, wr); + break; + case IBV_WR_RDMA_READ: + ocrdma_build_read(qp, hdr, wr); + break; + default: + status = -EINVAL; + break; + } + if (status) { + *bad_wr = wr; + break; + } + if (wr->send_flags & IBV_SEND_SIGNALED || qp->signaled) + qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1; + else + qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0; + + if (qp->dpp_enabled && (wr->send_flags & IBV_SEND_INLINE)) + ocrdma_post_dpp_wqe(qp, hdr); + + ocrdma_swap_cpu_to_le(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) & + OCRDMA_WQE_SIZE_MASK) * + OCRDMA_WQE_STRIDE); + + wmb(); + ocrdma_ring_sq_db(qp); + + /* update pointer, counter for next wr */ + ocrdma_hwq_inc_head(&qp->sq); + wr = wr->next; + } + pthread_spin_unlock(&qp->q_lock); + + return status; +} + +static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ibv_recv_wr *wr, + uint16_t tag) +{ + struct ocrdma_sge *sge; + uint32_t wqe_size; + + if (wr->num_sge) + wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe); + else + wqe_size = sizeof(*sge) + sizeof(*rqe); + + rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT); + rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + rqe->total_len = 0; + rqe->rsvd_tag = tag; + sge = (struct ocrdma_sge *)(rqe + 1); + ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list); + ocrdma_swap_cpu_to_le(rqe, wqe_size); +} + +/* + * ocrdma_post_recv + */ +int ocrdma_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + int status = 0; + struct ocrdma_qp *qp; + struct ocrdma_hdr_wqe *rqe; + + qp = get_ocrdma_qp(ibqp); + + pthread_spin_lock(&qp->q_lock); + if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) { + pthread_spin_unlock(&qp->q_lock); + *bad_wr = wr; + return -EINVAL; + } + + while (wr) { + if (ocrdma_hwq_free_cnt(&qp->rq) == 0 || + wr->num_sge > qp->rq.max_sges) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + rqe = ocrdma_hwq_head(&qp->rq); + ocrdma_build_rqe(rqe, wr, 0); + qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id; + wmb(); + ocrdma_ring_rq_db(qp); + + /* update pointer, counter for next wr */ + ocrdma_hwq_inc_head(&qp->rq); + wr = wr->next; + } + pthread_spin_unlock(&qp->q_lock); + + return status; +} + +static enum ibv_wc_status ocrdma_to_ibwc_err(uint16_t status) +{ + enum ibv_wc_opcode ibwc_status = IBV_WC_GENERAL_ERR; + switch (status) { + case OCRDMA_CQE_GENERAL_ERR: + ibwc_status = IBV_WC_GENERAL_ERR; + break; + case OCRDMA_CQE_LOC_LEN_ERR: + ibwc_status = IBV_WC_LOC_LEN_ERR; + break; + case OCRDMA_CQE_LOC_QP_OP_ERR: + ibwc_status = IBV_WC_LOC_QP_OP_ERR; + break; + case OCRDMA_CQE_LOC_EEC_OP_ERR: + ibwc_status = IBV_WC_LOC_EEC_OP_ERR; + break; + case OCRDMA_CQE_LOC_PROT_ERR: + ibwc_status = IBV_WC_LOC_PROT_ERR; + break; + case OCRDMA_CQE_WR_FLUSH_ERR: + ibwc_status = IBV_WC_WR_FLUSH_ERR; + break; + case OCRDMA_CQE_BAD_RESP_ERR: + ibwc_status = IBV_WC_BAD_RESP_ERR; + break; + case OCRDMA_CQE_LOC_ACCESS_ERR: + ibwc_status = IBV_WC_LOC_ACCESS_ERR; + break; + case OCRDMA_CQE_REM_INV_REQ_ERR: + ibwc_status = IBV_WC_REM_INV_REQ_ERR; + break; + case OCRDMA_CQE_REM_ACCESS_ERR: + ibwc_status = IBV_WC_REM_ACCESS_ERR; + break; + case OCRDMA_CQE_REM_OP_ERR: + ibwc_status = IBV_WC_REM_OP_ERR; + break; + case OCRDMA_CQE_RETRY_EXC_ERR: + ibwc_status = IBV_WC_RETRY_EXC_ERR; + break; + case OCRDMA_CQE_RNR_RETRY_EXC_ERR: + ibwc_status = IBV_WC_RNR_RETRY_EXC_ERR; + break; + case OCRDMA_CQE_LOC_RDD_VIOL_ERR: + ibwc_status = IBV_WC_LOC_RDD_VIOL_ERR; + break; + case OCRDMA_CQE_REM_INV_RD_REQ_ERR: + ibwc_status = IBV_WC_REM_INV_RD_REQ_ERR; + break; + case OCRDMA_CQE_REM_ABORT_ERR: + ibwc_status = IBV_WC_REM_ABORT_ERR; + break; + case OCRDMA_CQE_INV_EECN_ERR: + ibwc_status = IBV_WC_INV_EECN_ERR; + break; + case OCRDMA_CQE_INV_EEC_STATE_ERR: + ibwc_status = IBV_WC_INV_EEC_STATE_ERR; + break; + case OCRDMA_CQE_FATAL_ERR: + ibwc_status = IBV_WC_FATAL_ERR; + break; + case OCRDMA_CQE_RESP_TIMEOUT_ERR: + ibwc_status = IBV_WC_RESP_TIMEOUT_ERR; + break; + default: + ibwc_status = IBV_WC_GENERAL_ERR; + break; + }; + return ibwc_status; +} + +static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ibv_wc *ibwc, + uint32_t wqe_idx) +{ + struct ocrdma_hdr_wqe *hdr; + struct ocrdma_sge *rw; + int opcode; + + hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx); + + ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid; + + /* Undo the hdr->cw swap */ + opcode = ocrdma_le_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK; + switch (opcode) { + case OCRDMA_WRITE: + ibwc->opcode = IBV_WC_RDMA_WRITE; + break; + case OCRDMA_READ: + rw = (struct ocrdma_sge *)(hdr + 1); + ibwc->opcode = IBV_WC_RDMA_READ; + ibwc->byte_len = rw->len; + break; + case OCRDMA_SEND: + ibwc->opcode = IBV_WC_SEND; + break; + default: + ibwc->status = IBV_WC_GENERAL_ERR; + ocrdma_err("%s() invalid opcode received = 0x%x\n", + __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); + break; + }; +} + +static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe) +{ + if (is_cqe_for_sq(cqe)) { + cqe->flags_status_srcqpn = + ocrdma_cpu_to_le(ocrdma_le_to_cpu(cqe->flags_status_srcqpn) + & ~OCRDMA_CQE_STATUS_MASK); + cqe->flags_status_srcqpn = + ocrdma_cpu_to_le(ocrdma_le_to_cpu(cqe->flags_status_srcqpn) + | (OCRDMA_CQE_WR_FLUSH_ERR << + OCRDMA_CQE_STATUS_SHIFT)); + } else { + if (qp->qp_type == IBV_QPT_UD) { + cqe->flags_status_srcqpn = + ocrdma_cpu_to_le(ocrdma_le_to_cpu + (cqe->flags_status_srcqpn) & + ~OCRDMA_CQE_UD_STATUS_MASK); + cqe->flags_status_srcqpn = + ocrdma_cpu_to_le(ocrdma_le_to_cpu + (cqe->flags_status_srcqpn) | + (OCRDMA_CQE_WR_FLUSH_ERR << + OCRDMA_CQE_UD_STATUS_SHIFT)); + } else { + cqe->flags_status_srcqpn = + ocrdma_cpu_to_le(ocrdma_le_to_cpu + (cqe->flags_status_srcqpn) & + ~OCRDMA_CQE_STATUS_MASK); + cqe->flags_status_srcqpn = + ocrdma_cpu_to_le(ocrdma_le_to_cpu + (cqe->flags_status_srcqpn) | + (OCRDMA_CQE_WR_FLUSH_ERR << + OCRDMA_CQE_STATUS_SHIFT)); + } + } +} + +static int ocrdma_update_err_cqe(struct ibv_wc *ibwc, struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp, int status) +{ + int expand = 0; + + ibwc->byte_len = 0; + ibwc->qp_num = qp->id; + ibwc->status = ocrdma_to_ibwc_err(status); + + ocrdma_flush_qp(qp); + ocrdma_qp_state_machine(qp, IBV_QPS_ERR); + + /* if wqe/rqe pending for which cqe needs to be returned, + * trigger inflating it. + */ + if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) { + expand = 1; + ocrdma_set_cqe_status_flushed(qp, cqe); + } + return expand; +} + +static int ocrdma_update_err_rcqe(struct ibv_wc *ibwc, struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp, int status) +{ + ibwc->opcode = IBV_WC_RECV; + ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; + ocrdma_hwq_inc_tail(&qp->rq); + + return ocrdma_update_err_cqe(ibwc, cqe, qp, status); +} + +static int ocrdma_update_err_scqe(struct ibv_wc *ibwc, struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp, int status) +{ + ocrdma_update_wc(qp, ibwc, qp->sq.tail); + ocrdma_hwq_inc_tail(&qp->sq); + + return ocrdma_update_err_cqe(ibwc, cqe, qp, status); +} + +static int ocrdma_poll_err_scqe(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe, struct ibv_wc *ibwc, + int *polled, int *stop) +{ + int expand; + int status = (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + + /* when hw sq is empty, but rq is not empty, so we continue + * to keep the cqe in order to get the cq event again. + */ + if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) { + /* when cq for rq and sq is same, it is safe to return + * flush cqe for RQEs. + */ + if (!qp->srq && (qp->sq_cq == qp->rq_cq)) { + *polled = 1; + status = OCRDMA_CQE_WR_FLUSH_ERR; + expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); + } else { + *polled = 0; + *stop = 1; + expand = 0; + } + } else { + *polled = 1; + expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); + } + return expand; +} + +static int ocrdma_poll_success_scqe(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe, + struct ibv_wc *ibwc, int *polled) +{ + int expand = 0; + int tail = qp->sq.tail; + uint32_t wqe_idx; + + if (!qp->wqe_wr_id_tbl[tail].signaled) { + *polled = 0; /* WC cannot be consumed yet */ + } else { + ibwc->status = IBV_WC_SUCCESS; + ibwc->wc_flags = 0; + ibwc->qp_num = qp->id; + ocrdma_update_wc(qp, ibwc, tail); + *polled = 1; + } + + wqe_idx = (ocrdma_le_to_cpu(cqe->wq.wqeidx) & + OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx; + if (tail != wqe_idx) /* CQE cannot be consumed yet */ + expand = 1; /* Coallesced CQE */ + + ocrdma_hwq_inc_tail(&qp->sq); + return expand; +} + +static int ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, + struct ibv_wc *ibwc, int *polled, int *stop) +{ + int status, expand; + + status = (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + + if (status == OCRDMA_CQE_SUCCESS) + expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled); + else + expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop); + return expand; +} + +static int ocrdma_update_ud_rcqe(struct ibv_wc *ibwc, struct ocrdma_cqe *cqe) +{ + int status; + + status = (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; + ibwc->src_qp = ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_SRCQP_MASK; + ibwc->pkey_index = ocrdma_le_to_cpu(cqe->ud.rxlen_pkey) & + OCRDMA_CQE_PKEY_MASK; + ibwc->wc_flags = IBV_WC_GRH; + ibwc->byte_len = (ocrdma_le_to_cpu(cqe->ud.rxlen_pkey) >> + OCRDMA_CQE_UD_XFER_LEN_SHIFT); + return status; +} + +static void ocrdma_update_free_srq_cqe(struct ibv_wc *ibwc, + struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp) +{ + struct ocrdma_srq *srq = 0; + uint32_t wqe_idx; + + srq = get_ocrdma_srq(qp->ibv_qp.srq); +#if !defined(SKH_A0_WORKAROUND) /* BUG 113416 */ + wqe_idx = (ocrdma_le_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; +#else + wqe_idx = (ocrdma_le_to_cpu(cqe->flags_status_srcqpn)) & 0xFFFF; +#endif + ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; + pthread_spin_lock(&srq->q_lock); + ocrdma_srq_toggle_bit(srq, wqe_idx); + pthread_spin_unlock(&srq->q_lock); + ocrdma_hwq_inc_tail(&srq->rq); +} + +static int ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, + struct ibv_wc *ibwc, int *polled, int *stop, + int status) +{ + int expand; + + /* when hw_rq is empty, but wq is not empty, so continue + * to keep the cqe to get the cq event again. + */ + if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) { + if (!qp->srq && (qp->sq_cq == qp->rq_cq)) { + *polled = 1; + status = OCRDMA_CQE_WR_FLUSH_ERR; + expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); + } else { + *polled = 0; + *stop = 1; + expand = 0; + } + } else { + *polled = 1; + expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); + } + return expand; +} + +static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe, + struct ibv_wc *ibwc) +{ + ibwc->opcode = IBV_WC_RECV; + ibwc->qp_num = qp->id; + ibwc->status = IBV_WC_SUCCESS; + + if (qp->qp_type == IBV_QPT_UD) + ocrdma_update_ud_rcqe(ibwc, cqe); + else + ibwc->byte_len = ocrdma_le_to_cpu(cqe->rq.rxlen); + + if (is_cqe_imm(cqe)) { + ibwc->imm_data = htonl(ocrdma_le_to_cpu(cqe->rq.lkey_immdt)); + ibwc->wc_flags |= IBV_WC_WITH_IMM; + } else if (is_cqe_wr_imm(cqe)) { + ibwc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + ibwc->imm_data = htonl(ocrdma_le_to_cpu(cqe->rq.lkey_immdt)); + ibwc->wc_flags |= IBV_WC_WITH_IMM; + } + if (qp->ibv_qp.srq) + ocrdma_update_free_srq_cqe(ibwc, cqe, qp); + else { + ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; + ocrdma_hwq_inc_tail(&qp->rq); + } +} + +static int ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, + struct ibv_wc *ibwc, int *polled, int *stop) +{ + int status; + int expand = 0; + + ibwc->wc_flags = 0; + if (qp->qp_type == IBV_QPT_UD) + status = (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; + else + status = (ocrdma_le_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + + if (status == OCRDMA_CQE_SUCCESS) { + *polled = 1; + ocrdma_poll_success_rcqe(qp, cqe, ibwc); + } else { + expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop, + status); + } + return expand; +} + +static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, + struct ocrdma_cqe *cqe, uint16_t cur_getp) +{ + if (cq->phase_change) { + if (cur_getp == 0) + cq->phase = (~cq->phase & OCRDMA_CQE_VALID); + } else + cqe->flags_status_srcqpn = 0; /* clear valid bit */ +} + +static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries, + struct ibv_wc *ibwc) +{ + uint16_t qpn = 0; + int i = 0; + int expand = 0; + int polled_hw_cqes = 0; + struct ocrdma_qp *qp = NULL; + struct ocrdma_device *dev = cq->dev; + struct ocrdma_cqe *cqe; + uint16_t cur_getp; + int polled = 0; + int stop = 0; + + cur_getp = cq->getp; + while (num_entries) { + cqe = cq->va + cur_getp; + /* check whether valid cqe or not */ + if (!is_cqe_valid(cq, cqe)) + break; + qpn = (ocrdma_le_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK); + /* ignore discarded cqe */ + if (qpn == 0) + goto skip_cqe; + qp = dev->qp_tbl[qpn]; + if (qp == NULL) { + ocrdma_err("%s() cqe for invalid qpn= 0x%x received.\n", + __func__, qpn); + goto skip_cqe; + } + + if (is_cqe_for_sq(cqe)) { + expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled, + &stop); + } else { + expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled, + &stop); + } + if (expand) + goto expand_cqe; + if (stop) + goto stop_cqe; + /* clear qpn to avoid duplicate processing by discard_cqe() */ + cqe->cmn.qpn = 0; +skip_cqe: + polled_hw_cqes += 1; + cur_getp = (cur_getp + 1) % cq->max_hw_cqe; + ocrdma_change_cq_phase(cq, cqe, cur_getp); +expand_cqe: + if (polled) { + num_entries -= 1; + i += 1; + ibwc = ibwc + 1; + polled = 0; + } + } +stop_cqe: + cq->getp = cur_getp; + if (polled_hw_cqes || expand || stop) + ocrdma_ring_cq_db(cq, cq->armed, cq->solicited, polled_hw_cqes); + return i; +} + +static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries, + struct ocrdma_qp *qp, struct ibv_wc *ibwc) +{ + int err_cqes = 0; + + while (num_entries) { + if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp)) + break; + if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) { + ocrdma_update_wc(qp, ibwc, qp->sq.tail); + ocrdma_hwq_inc_tail(&qp->sq); + } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) { + ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; + ocrdma_hwq_inc_tail(&qp->rq); + } else + return err_cqes; + ibwc->byte_len = 0; + ibwc->status = IBV_WC_WR_FLUSH_ERR; + ibwc = ibwc + 1; + err_cqes += 1; + num_entries -= 1; + } + return err_cqes; +} + +/* + * ocrdma_poll_cq + */ +int ocrdma_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) +{ + struct ocrdma_cq *cq; + int cqes_to_poll = num_entries; + int num_os_cqe = 0, err_cqes = 0; + struct ocrdma_qp *qp; + struct ocrdma_list_node *cur, *tmp; + + cq = get_ocrdma_cq(ibcq); + pthread_spin_lock(&cq->cq_lock); + num_os_cqe = ocrdma_poll_hwcq(cq, num_entries, wc); + pthread_spin_unlock(&cq->cq_lock); + cqes_to_poll -= num_os_cqe; + + if (cqes_to_poll) { + wc = wc + num_os_cqe; + pthread_spin_lock(&cq->dev->flush_q_lock); + list_for_each_node_safe(cur, tmp, &cq->sq_head) { + qp = list_node(cur, struct ocrdma_qp, sq_entry); + if (cqes_to_poll == 0) + break; + err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc); + cqes_to_poll -= err_cqes; + num_os_cqe += err_cqes; + wc = wc + err_cqes; + } + pthread_spin_unlock(&cq->dev->flush_q_lock); + } + return num_os_cqe; +} + +/* + * ocrdma_arm_cq + */ +int ocrdma_arm_cq(struct ibv_cq *ibcq, int solicited) +{ + struct ocrdma_cq *cq; + uint16_t cur_getp; + struct ocrdma_cqe *cqe; + + cq = get_ocrdma_cq(ibcq); + pthread_spin_lock(&cq->cq_lock); + + cur_getp = cq->getp; + cqe = cq->va + cur_getp; + + cq->armed = 1; + cq->solicited = solicited; + /* check whether any valid cqe exist or not, if not then safe to + * arm. If cqe is not yet consumed, then let it get consumed and then + * we arm it to avoid 0 interrupts. + */ + if (!is_cqe_valid(cq, cqe) || cq->arm_needed) { + cq->arm_needed = 0; + ocrdma_ring_cq_db(cq, cq->armed, cq->solicited, 0); + } + pthread_spin_unlock(&cq->cq_lock); + + return 0; +} + +void ocrdma_cq_handler(struct ibv_cq *ibcq) +{ + struct ocrdma_cq *cq; + + cq = get_ocrdma_cq(ibcq); + pthread_spin_lock(&cq->cq_lock); + cq->armed = 0; + cq->solicited = 0; + ocrdma_ring_cq_db(cq, cq->armed, cq->solicited, 0); + pthread_spin_unlock(&cq->cq_lock); + +} + +/* + * ocrdma_post_srq_recv + */ +int ocrdma_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + int status = 0; + uint16_t tag; + struct ocrdma_srq *srq; + struct ocrdma_hdr_wqe *rqe; + + srq = get_ocrdma_srq(ibsrq); + pthread_spin_lock(&srq->q_lock); + while (wr) { + if (ocrdma_hwq_free_cnt(&srq->rq) == 0 || + wr->num_sge > srq->rq.max_sges) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + rqe = ocrdma_hwq_head(&srq->rq); + tag = ocrdma_srq_get_idx(srq); + ocrdma_build_rqe(rqe, wr, tag); + srq->rqe_wr_id_tbl[tag] = wr->wr_id; + + wmb(); + ocrdma_ring_srq_db(srq); + + /* update pointer, counter for next wr */ + ocrdma_hwq_inc_head(&srq->rq); + wr = wr->next; + } + pthread_spin_unlock(&srq->q_lock); + return status; +} + +/* + * ocrdma_create_ah + */ +struct ibv_ah *ocrdma_create_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) +{ + int status; + int ahtbl_idx; + struct ocrdma_pd *pd; + struct ocrdma_ah *ah; + + pd = get_ocrdma_pd(ibpd); + ah = malloc(sizeof *ah); + if (!ah) + return NULL; + bzero(ah, sizeof *ah); + ah->pd = pd; + + ahtbl_idx = ocrdma_alloc_ah_tbl_id(pd->uctx); + if (ahtbl_idx < 0) + goto tbl_err; + attr->dlid = ahtbl_idx; + status = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr); + if (status) + goto cmd_err; + + ah->id = pd->uctx->ah_tbl[ahtbl_idx]; + return &ah->ibv_ah; + +cmd_err: + ocrdma_free_ah_tbl_id(pd->uctx, ahtbl_idx); +tbl_err: + free(ah); + return NULL; +} + +/* + * ocrdma_destroy_ah + */ +int ocrdma_destroy_ah(struct ibv_ah *ibah) +{ + int status; + struct ocrdma_ah *ah; + struct ocrdma_device *dev; + ah = get_ocrdma_ah(ibah); + dev = ah->pd->dev; + status = ibv_cmd_destroy_ah(ibah); + ocrdma_free_ah_tbl_id(ah->pd->uctx, ah->id); + free(ah); + return status; +} + +/* + * ocrdma_attach_mcast + */ +int ocrdma_attach_mcast(struct ibv_qp *ibqp, const union ibv_gid *gid, + uint16_t lid) +{ + int status; + struct ocrdma_qp *qp; + qp = get_ocrdma_qp(ibqp); + status = ibv_cmd_attach_mcast(ibqp, gid, lid); + return status; +} + +/* + * ocrdma_detach_mcast + */ +int ocrdma_detach_mcast(struct ibv_qp *ibqp, const union ibv_gid *gid, + uint16_t lid) +{ + int status; + struct ocrdma_qp *qp; + qp = get_ocrdma_qp(ibqp); + status = ibv_cmd_detach_mcast(ibqp, gid, lid); + return status; +} + +void ocrdma_async_event(struct ibv_async_event *event) +{ + struct ocrdma_cq *cq = NULL; + struct ocrdma_qp *qp = NULL; + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + cq = get_ocrdma_cq(event->element.cq); + break; + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_PATH_MIG_ERR:{ + qp = get_ocrdma_qp(event->element.qp); + break; + } + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_QP_LAST_WQE_REACHED: + break; + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: + break; + default: + break; + } +} -- 2.46.0