From 93c6b6d68a85d5c2b0c28819b45461411f249efc Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Mon, 15 Jan 2018 14:01:44 +0100 Subject: [PATCH] Remove support for implicit solvation Mdp files with implicit-solvent = no can still be read, and formerly valid related fields are now ignored, so that default mdp files from previous versions of GROMACS will work. Anything else for the implciit-solvent mdp value gives an error in grompp. grompp can now only write a tpr file that has a false value for ir->implicit_solvent, but can read older versions. When mdrun is presented with an older tpr file that did such a simulation, it refuses to run, presenting a useful error message. Such tpr files are still useful for other purposes, so can still be read, except that the fields specific to these methods are ignored. grompp now ignores the topology directives for related parameters, which means that force-field folders that are the same as, or modifications of folders formerly supported by GROMACS still work. However, the versions currently distributed have none of those fields. The group-scheme kernels have been removed, and generation infrastructure updated so that they do generate the code that's in the repo. However, now that the python generation scripts no longer generate GB kernels, the dictionary ordering changes, which changes the generated output. That output is not sensitive to the order of the declarations or data-structure elements, so this is only a cosmetic issue. Documentation has been removed. Unit tests on .mdp file handling have had to be updated. Also removed unused enbcoul enumeration Refs #1500 Refs #1971 Fixes #1054 Change-Id: Ib241555ff3d8e60012ba0e628ab0f9a3f91eca9e --- docs/manual/algorithms.tex | 68 - docs/manual/forcefield.tex | 2 +- docs/manual/topology.tex | 32 - docs/user-guide/cutoff-schemes.rst | 1 - docs/user-guide/mdp-options.rst | 100 +- docs/user-guide/mdrun-performance.rst | 1 - docs/user-guide/terminology.rst | 2 +- src/gromacs/domdec/domdec.cpp | 6 - src/gromacs/fileio/tpxio.cpp | 134 +- .../gmxlib/nonbonded/nb_free_energy.cpp | 6 +- src/gromacs/gmxlib/nonbonded/nb_generic.cpp | 7 +- src/gromacs/gmxlib/nonbonded/nb_kernel.h | 1 - .../make_nb_kernel_avx_128_fma_double.py | 6 +- ...cGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c | 858 --------- ...ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c | 736 ------- ...ecGB_VdwNone_GeomP1P1_avx_128_fma_double.c | 665 ------- .../nb_kernel_avx_128_fma_double.c | 256 ++- .../nb_kernel_template_avx_128_fma_double.pre | 115 +- .../make_nb_kernel_avx_128_fma_single.py | 6 +- ...cGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c | 978 ---------- ...ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c | 864 --------- ...ecGB_VdwNone_GeomP1P1_avx_128_fma_single.c | 769 -------- .../nb_kernel_avx_128_fma_single.c | 256 ++- .../nb_kernel_template_avx_128_fma_single.pre | 111 +- .../make_nb_kernel_avx_256_double.py | 6 +- ..._ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c | 972 ---------- ...nel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c | 870 --------- ...l_ElecGB_VdwNone_GeomP1P1_avx_256_double.c | 775 -------- .../nb_kernel_avx_256_double.c | 256 ++- .../nb_kernel_template_avx_256_double.pre | 107 +- .../make_nb_kernel_avx_256_single.py | 6 +- ..._ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c | 1192 ------------ ...nel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c | 1038 ---------- ...l_ElecGB_VdwNone_GeomP1P1_avx_256_single.c | 911 --------- .../nb_kernel_avx_256_single.c | 256 ++- .../nb_kernel_template_avx_256_single.pre | 126 +- .../nonbonded/nb_kernel_c/make_nb_kernel_c.py | 6 +- .../nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c | 505 ----- .../nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c | 545 ------ .../nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c | 501 ----- .../nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c | 459 ----- .../nb_kernel_c/nb_kernel_allvsallgb.cpp | 527 ----- .../nb_kernel_c/nb_kernel_allvsallgb.h | 55 - .../nonbonded/nb_kernel_c/nb_kernel_c.c | 338 ++-- .../nb_kernel_c/nb_kernel_template_c.pre | 81 +- .../make_nb_kernel_sparc64_hpc_ace_double.py | 6 +- ...VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c | 820 -------- ...GB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c | 706 ------- ..._VdwNone_GeomP1P1_sparc64_hpc_ace_double.c | 633 ------ .../nb_kernel_sparc64_hpc_ace_double.c | 256 ++- ...kernel_template_sparc64_hpc_ace_double.pre | 107 +- .../make_nb_kernel_sse2_double.py | 6 +- ...nel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c | 838 -------- ...kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c | 728 ------- ...rnel_ElecGB_VdwNone_GeomP1P1_sse2_double.c | 657 ------- .../nb_kernel_sse2_double.c | 256 ++- .../nb_kernel_template_sse2_double.pre | 111 +- .../make_nb_kernel_sse2_single.py | 6 +- ...nel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c | 958 --------- ...kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c | 856 -------- ...rnel_ElecGB_VdwNone_GeomP1P1_sse2_single.c | 761 -------- .../nb_kernel_sse2_single.c | 256 ++- .../nb_kernel_template_sse2_single.pre | 107 +- .../make_nb_kernel_sse4_1_double.py | 6 +- ...l_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c | 838 -------- ...rnel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c | 728 ------- ...el_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c | 657 ------- .../nb_kernel_sse4_1_double.c | 256 ++- .../nb_kernel_template_sse4_1_double.pre | 111 +- .../make_nb_kernel_sse4_1_single.py | 6 +- ...l_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c | 954 --------- ...rnel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c | 852 -------- ...el_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c | 757 -------- .../nb_kernel_sse4_1_single.c | 256 ++- .../nb_kernel_template_sse4_1_single.pre | 106 +- src/gromacs/gmxlib/nonbonded/nonbonded.cpp | 3 +- src/gromacs/gmxlib/nrnb.cpp | 14 +- src/gromacs/gmxlib/nrnb.h | 12 +- src/gromacs/gmxpreprocess/convparm.cpp | 34 +- src/gromacs/gmxpreprocess/gpp_atomtype.cpp | 156 +- src/gromacs/gmxpreprocess/gpp_atomtype.h | 20 +- src/gromacs/gmxpreprocess/grompp.cpp | 87 +- src/gromacs/gmxpreprocess/nm2type.cpp | 4 +- src/gromacs/gmxpreprocess/readir.cpp | 117 +- src/gromacs/gmxpreprocess/resall.cpp | 4 +- src/gromacs/gmxpreprocess/tests/readir.cpp | 12 + .../refdata/GetIrTest_EmptyInputWorks.xml | 26 +- ...IrTest_HandlesDifferentKindsOfMdpLines.xml | 26 +- .../GetIrTest_HandlesOnlyCutoffScheme.xml | 26 +- .../GetIrTest_ImplicitSolventNoWorks.xml | 321 +++ ...IrTest_ProducesOutputFromElectricField.xml | 26 +- ...ucesOutputFromElectricFieldOscillating.xml | 26 +- ..._ProducesOutputFromElectricFieldPulsed.xml | 26 +- .../GetIrTest_UserErrorsSilentlyTolerated.xml | 26 +- src/gromacs/gmxpreprocess/topdirs.cpp | 4 +- src/gromacs/gmxpreprocess/topio.cpp | 199 +- src/gromacs/gmxpreprocess/topio.h | 3 +- src/gromacs/gmxpreprocess/toppush.cpp | 82 +- src/gromacs/gmxpreprocess/toppush.h | 7 +- src/gromacs/gmxpreprocess/toputil.cpp | 4 +- src/gromacs/listed-forces/listed-forces.cpp | 5 +- src/gromacs/mdlib/broadcaststructs.cpp | 18 +- src/gromacs/mdlib/force.cpp | 37 +- src/gromacs/mdlib/force.h | 6 +- src/gromacs/mdlib/forcerec.cpp | 68 +- src/gromacs/mdlib/forcerec.h | 4 +- src/gromacs/mdlib/genborn.cpp | 1713 ----------------- src/gromacs/mdlib/genborn.h | 170 -- src/gromacs/mdlib/genborn_allvsall.cpp | 1108 ----------- src/gromacs/mdlib/genborn_allvsall.h | 73 - src/gromacs/mdlib/mdebin.cpp | 14 +- src/gromacs/mdlib/minimize.cpp | 7 +- src/gromacs/mdlib/shellfc.cpp | 7 +- src/gromacs/mdlib/shellfc.h | 3 +- src/gromacs/mdlib/sim_util.cpp | 22 +- src/gromacs/mdlib/tpi.cpp | 4 +- src/gromacs/mdtypes/forcerec.h | 50 +- src/gromacs/mdtypes/inputrec.cpp | 33 +- src/gromacs/mdtypes/inputrec.h | 15 +- src/gromacs/mdtypes/md_enums.cpp | 18 +- src/gromacs/mdtypes/md_enums.h | 37 +- src/gromacs/tables/forcetable.cpp | 70 +- src/gromacs/tables/forcetable.h | 9 +- src/gromacs/timing/wallcycle.cpp | 2 +- src/gromacs/timing/wallcycle.h | 4 +- src/gromacs/topology/atoms.cpp | 17 +- src/gromacs/topology/atoms.h | 7 +- src/gromacs/topology/idef.cpp | 14 +- src/gromacs/topology/idef.h | 15 +- src/gromacs/topology/ifunc.cpp | 12 +- src/programs/mdrun/md.cpp | 13 +- 131 files changed, 1870 insertions(+), 32077 deletions(-) delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sparc64_hpc_ace_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c create mode 100644 src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml delete mode 100644 src/gromacs/mdlib/genborn.cpp delete mode 100644 src/gromacs/mdlib/genborn.h delete mode 100644 src/gromacs/mdlib/genborn_allvsall.cpp delete mode 100644 src/gromacs/mdlib/genborn_allvsall.h diff --git a/docs/manual/algorithms.tex b/docs/manual/algorithms.tex index 514ae907c0..be04de79c9 100644 --- a/docs/manual/algorithms.tex +++ b/docs/manual/algorithms.tex @@ -3051,74 +3051,6 @@ and separate PME-mesh ranks. \end{figure} -\section{Implicit solvation\index{implicit solvation}\index{Generalized Born methods}} -\label{sec:gbsa} -Implicit solvent models provide an efficient way of representing -the electrostatic effects of solvent molecules, while saving a -large piece of the computations involved in an accurate, aqueous -description of the surrounding water in molecular dynamics simulations. -Implicit solvation models offer several advantages compared with -explicit solvation, including eliminating the need for the equilibration of water -around the solute, and the absence of viscosity, which allows the protein -to more quickly explore conformational space. - -Implicit solvent calculations in {\gromacs} can be done using the -generalized Born-formalism, and the Still~\cite{Still97}, HCT~\cite{Truhlar96}, -and OBC~\cite{Case04} models are available for calculating the Born radii. - -Here, the free energy $G_{\mathrm{solv}}$ of solvation is the sum of three terms, -a solvent-solvent cavity term ($G_{\mathrm{cav}}$), a solute-solvent van der -Waals term ($G_{\mathrm{vdw}}$), and finally a solvent-solute electrostatics -polarization term ($G_{\mathrm{pol}}$). - -The sum of $G_{\mathrm{cav}}$ and $G_{\mathrm{vdw}}$ corresponds to the (non-polar) -free energy of solvation for a molecule from which all charges -have been removed, and is commonly called $G_{\mathrm{np}}$, -calculated from the total solvent accessible surface area -multiplied with a surface tension. -The total expression for the solvation free energy then becomes: - -\beq -G_{\mathrm{solv}} = G_{\mathrm{np}} + G_{\mathrm{pol}} -\label{eqn:gb_solv} -\eeq - -Under the generalized Born model, $G_{\mathrm{pol}}$ is calculated from the generalized Born equation~\cite{Still97}: - -\beq -G_{\mathrm{pol}} = \left(1-\frac{1}{\epsilon}\right) \sum_{i=1}^n \sum_{j>i}^n \frac {q_i q_j}{\sqrt{r^2_{ij} + b_i b_j \exp\left(\frac{-r^2_{ij}}{4 b_i b_j}\right)}} -\label{eqn:gb_still} -\eeq - -In {\gromacs}, we have introduced the substitution~\cite{Larsson10}: - -\beq -c_i=\frac{1}{\sqrt{b_i}} -\label{eqn:gb_subst} -\eeq - -which makes it possible to introduce a cheap transformation to a new -variable $x$ when evaluating each interaction, such that: - -\beq -x=\frac{r_{ij}}{\sqrt{b_i b_j }} = r_{ij} c_i c_j -\label{eqn:gb_subst2} -\eeq - -In the end, the full re-formulation of~\ref{eqn:gb_still} becomes: - -\beq -G_{\mathrm{pol}} = \left(1-\frac{1}{\epsilon}\right) \sum_{i=1}^n \sum_{j>i}^n \frac{q_i q_j}{\sqrt{b_i b_j}} ~\xi (x) = \left(1-\frac{1}{\epsilon}\right) \sum_{i=1}^n q_i c_i \sum_{j>i}^n q_j c_j~\xi (x) -\label{eqn:gb_final} -\eeq - -The non-polar part ($G_{\mathrm{np}}$) of Equation~\ref{eqn:gb_solv} is calculated -directly from the Born radius of each atom using a simple ACE type -approximation by Schaefer {\em et al.}~\cite{Karplus98}, including a -simple loop over all atoms. -This requires only one extra solvation parameter, independent of atom type, -but differing slightly between the three Born radii models. - % LocalWords: GROningen MAchine BIOSON Groningen GROMACS Berendsen der Spoel % LocalWords: Drunen Comp Phys Comm ROck NS FFT pbc EM ifthenelse gmxlite ff % LocalWords: octahedra triclinic Ewald PME PPPM trjconv xy solvated diff --git a/docs/manual/forcefield.tex b/docs/manual/forcefield.tex index f50da64e64..0445b73d74 100644 --- a/docs/manual/forcefield.tex +++ b/docs/manual/forcefield.tex @@ -2891,7 +2891,7 @@ of the blocks. {\bf Note} that all {\gromacs} programs can read compressed \subsection{CHARMM\index{CHARMM force field}} \label{subsec:charmmff} -{\gromacs} supports the CHARMM force field for proteins~\cite{mackerell04, mackerell98}, lipids~\cite{feller00} and nucleic acids~\cite{foloppe00,Mac2000}. The protein parameters (and to some extent the lipid and nucleic acid parameters) were thoroughly tested -- both by comparing potential energies between the port and the standard parameter set in the CHARMM molecular simulation package, as well by how the protein force field behaves together with {\gromacs}-specific techniques such as virtual sites (enabling long time steps) and a fast implicit solvent recently implemented~\cite{Larsson10} -- and the details and results are presented in the paper by Bjelkmar et al.~\cite{Bjelkmar10}. The nucleic acid parameters, as well as the ones for HEME, were converted and tested by Michel Cuendet. +{\gromacs} supports the CHARMM force field for proteins~\cite{mackerell04, mackerell98}, lipids~\cite{feller00} and nucleic acids~\cite{foloppe00,Mac2000}. The protein parameters (and to some extent the lipid and nucleic acid parameters) were thoroughly tested -- both by comparing potential energies between the port and the standard parameter set in the CHARMM molecular simulation package, as well by how the protein force field behaves together with {\gromacs}-specific techniques such as virtual sites (enabling long time steps) recently implemented~\cite{Larsson10} -- and the details and results are presented in the paper by Bjelkmar et al.~\cite{Bjelkmar10}. The nucleic acid parameters, as well as the ones for HEME, were converted and tested by Michel Cuendet. When selecting the CHARMM force field in {\tt \normindex{pdb2gmx}} the default option is to use \normindex{CMAP} (for torsional correction map). To exclude CMAP, use {\tt -nocmap}. The basic form of the CMAP term implemented in {\gromacs} is a function of the $\phi$ and $\psi$ backbone torsion angles. This term is defined in the {\tt .rtp} file by a {\tt [ cmap ]} statement at the end of each residue supporting CMAP. The following five atom names define the two torsional angles. Atoms 1-4 define $\phi$, and atoms 2-5 define $\psi$. The corresponding atom types are then matched to the correct CMAP type in the {\tt cmap.itp} file that contains the correction maps. diff --git a/docs/manual/topology.tex b/docs/manual/topology.tex index 9336933c40..bfd0393b5a 100644 --- a/docs/manual/topology.tex +++ b/docs/manual/topology.tex @@ -557,37 +557,6 @@ to be excluded, is it more convenient and much more efficient to use energy monitor group exclusions (see \secref{groupconcept}). -\section{Implicit solvation parameters\index{implicit solvation parameters}} -Starting with {\gromacs} 4.5, implicit solvent is supported. A section in the -topology has been introduced to list those parameters: - -{\small -\begin{verbatim} -[ implicit_genborn_params ] -; Atomtype sar st pi gbr hct -NH1 0.155 1 1.028 0.17063 0.79 ; N -N 0.155 1 1 0.155 0.79 ; Proline backbone N -H 0.1 1 1 0.115 0.85 ; H -CT1 0.180 1 1.276 0.190 0.72 ; C -\end{verbatim}} - -In this example the atom type is listed first, followed by five -numbers, and a comment (following a semicolon). - -Values in columns 1-3 are not currently used. They pertain to more -elaborate surface area algorithms, the one from Qiu {\em et al.}~\cite{Still97} in -particular. Column 4 contains the atomic van der Waals radii, which are used -in computing the Born radii. The dielectric offset is specified in -the {\tt *.mdp} file, and gets subtracted from the input van der Waals radii for the different -Born radii methods, as described by Onufriev {\em et al.}~\cite{Case04}. Column 5 is the -scale factor for the HCT and OBC models. The values are taken from the Tinker implementation of -the HCT pairwise scaling method~\cite{Truhlar96}. This method has been modified such that the -scaling factors have been adjusted to minimize differences between analytical surface areas and -GB using the HCT algorithm. The scaling is further modified in that it is not applied pairwise -as proposed by Hawkins {\em et al.}~\cite{Truhlar96}, but on a per-atom (rather than a per-pair) -basis. - - \section{Constraint algorithms\index{constraint algorithms}} \label{sec:constraints} Constraints are defined in the {\tt [~constraints~]} section. @@ -1463,7 +1432,6 @@ its content. That content can be seen in #include "ffnonbonded.itp" #include "ffbonded.itp" -#include "gbsa.itp" \end{verbatim}} The two {\tt \#define} statements set up the conditions so that diff --git a/docs/user-guide/cutoff-schemes.rst b/docs/user-guide/cutoff-schemes.rst index 5253b8fcc0..e78c35d251 100644 --- a/docs/user-guide/cutoff-schemes.rst +++ b/docs/user-guide/cutoff-schemes.rst @@ -57,7 +57,6 @@ force-switch interactions yes yes switched potential yes yes switched forces yes yes non-periodic systems yes Z + walls -implicit solvent yes no free energy perturbed non-bondeds yes yes energy group contributions yes only on CPU energy group exclusions yes no diff --git a/docs/user-guide/mdp-options.rst b/docs/user-guide/mdp-options.rst index 66d210a56d..096cb4cbb6 100644 --- a/docs/user-guide/mdp-options.rst +++ b/docs/user-guide/mdp-options.rst @@ -3102,100 +3102,6 @@ Mixed quantum/classical molecular dynamics CASSCF method. -Implicit solvent -^^^^^^^^^^^^^^^^ - -.. mdp:: implicit-solvent - - .. mdp-value:: no - - No implicit solvent - - .. mdp-value:: GBSA - - Do a simulation with implicit solvent using the Generalized Born - formalism. Three different methods for calculating the Born - radii are available, Still, HCT and OBC. These are specified - with the :mdp:`gb-algorithm` field. The non-polar solvation is - specified with the :mdp:`sa-algorithm` field. - -.. mdp:: gb-algorithm - - .. mdp-value:: Still - - Use the Still method to calculate the Born radii - - .. mdp-value:: HCT - - Use the Hawkins-Cramer-Truhlar method to calculate the Born - radii - - .. mdp-value:: OBC - - Use the Onufriev-Bashford-Case method to calculate the Born - radii - -.. mdp:: nstgbradii - - (1) \[steps\] - Frequency to (re)-calculate the Born radii. For most practial - purposes, setting a value larger than 1 violates energy - conservation and leads to unstable trajectories. - -.. mdp:: rgbradii - - (1.0) \[nm\] - Cut-off for the calculation of the Born radii. Currently must be - equal to rlist - -.. mdp:: gb-epsilon-solvent - - (80) - Dielectric constant for the implicit solvent - -.. mdp:: gb-saltconc - - (0) \[M\] - Salt concentration for implicit solvent models, currently not used - -.. mdp:: gb-obc-alpha -.. mdp:: gb-obc-beta -.. mdp:: gb-obc-gamma - - Scale factors for the OBC model. Default values of 1, 0.78 and 4.85 - respectively are for OBC(II). Values for OBC(I) are 0.8, 0 and 2.91 - respectively - -.. mdp:: gb-dielectric-offset - - (0.009) \[nm\] - Distance for the di-electric offset when calculating the Born - radii. This is the offset between the center of each atom the - center of the polarization energy for the corresponding atom - -.. mdp:: sa-algorithm - - .. mdp-value:: Ace-approximation - - Use an Ace-type approximation - - .. mdp-value:: None - - No non-polar solvation calculation done. For GBSA only the polar - part gets calculated - -.. mdp:: sa-surface-tension - - \[kJ mol-1 nm-2\] - Default value for surface tension with SA algorithms. The default - value is -1; Note that if this default value is not changed it will - be overridden by :ref:`gmx grompp` using values that are specific - for the choice of radii algorithm (0.0049 kcal/mol/Angstrom^2 for - Still, 0.0054 kcal/mol/Angstrom2 for HCT/OBC) Setting it to 0 will - while using an sa-algorithm other than None means no non-polar - calculations are done. - - Computational Electrophysiology ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use these options to switch on and control ion/water position exchanges in "Computational @@ -3344,7 +3250,7 @@ User defined thingies Removed features ^^^^^^^^^^^^^^^^ -This feature has been removed from |Gromacs|, but so that old +These features have been removed from |Gromacs|, but so that old :ref:`mdp` and :ref:`tpr` files cannot be mistakenly misused, we still parse this option. :ref:`gmx grompp` and :ref:`gmx mdrun` will issue a fatal error if this is set. @@ -3353,4 +3259,8 @@ fatal error if this is set. (no) +.. mdp:: implicit-solvent + + (no) + .. _reference manual: gmx-manual-parent-dir_ diff --git a/docs/user-guide/mdrun-performance.rst b/docs/user-guide/mdrun-performance.rst index f460f6f4d6..859fc5f068 100644 --- a/docs/user-guide/mdrun-performance.rst +++ b/docs/user-guide/mdrun-performance.rst @@ -577,7 +577,6 @@ The performance counters are: * Neighbor search * Launch GPU operations * Communication of coordinates -* Born radii * Force * Waiting + Communication of force * Particle mesh Ewald diff --git a/docs/user-guide/terminology.rst b/docs/user-guide/terminology.rst index e5ad730331..29eae4f5ef 100644 --- a/docs/user-guide/terminology.rst +++ b/docs/user-guide/terminology.rst @@ -333,7 +333,7 @@ when addressing such a scenario: to a breakdown in the model physics, even if the starting configuration of the system is reasonable. -If using implicit solvation, starting your equilibration with a smaller time +When using no explict solvent, starting your equilibration with a smaller time step than your production run can help energy equipartition more stably. There are several common situations in which instability frequently arises, diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index 33745e1be4..bb90e31364 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -67,7 +67,6 @@ #include "gromacs/mdlib/constr.h" #include "gromacs/mdlib/force.h" #include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/genborn.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdlib/mdatoms.h" #include "gromacs/mdlib/mdrun.h" @@ -9755,11 +9754,6 @@ void dd_partition_system(FILE *fplog, mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr, nullptr, mdAtoms, vsite, nullptr); - if (ir->implicit_solvent) - { - make_local_gb(cr, fr->born, ir->gb_algorithm); - } - auto mdatoms = mdAtoms->mdatoms(); if (!thisRankHasDuty(cr, DUTY_PME)) { diff --git a/src/gromacs/fileio/tpxio.cpp b/src/gromacs/fileio/tpxio.cpp index 6e3db2cd43..013718e7d4 100644 --- a/src/gromacs/fileio/tpxio.cpp +++ b/src/gromacs/fileio/tpxio.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -118,6 +118,7 @@ enum tpxv { tpxv_PullExternalPotential, /**< Added pull type external potential */ tpxv_GenericParamsForElectricField, /**< Introduced KeyValueTree and moved electric field parameters */ tpxv_AcceleratedWeightHistogram, /**< sampling with accelerated weight histogram method (AWH) */ + tpxv_RemoveImplicitSolvation, /**< removed support for implicit solvation */ tpxv_Count /**< the total number of tpxv versions */ }; @@ -164,9 +165,18 @@ typedef struct { } t_ftupd; /* + * TODO The following three lines make little sense, please clarify if + * you've had to work out how ftupd works. + * * The entries should be ordered in: * 1. ascending function type number * 2. ascending file version number + * + * Because we support reading of old .tpr file versions (even when + * mdrun can no longer run the simulation), we need to be able to read + * obsolete t_interaction_function types. Any data read from such + * fields is discarded. Their names have _NOLONGERUSED appended to + * them to make things clear. */ static const t_ftupd ftupd[] = { { 34, F_FENEBONDS }, @@ -181,11 +191,11 @@ static const t_ftupd ftupd[] = { { tpxv_RestrictedBendingAndCombinedAngleTorsionPotentials, F_CBTDIHS }, { 43, F_TABDIHS }, { 65, F_CMAP }, - { 60, F_GB12 }, - { 61, F_GB13 }, - { 61, F_GB14 }, - { 72, F_GBPOL }, - { 72, F_NPSOLVATION }, + { 60, F_GB12_NOLONGERUSED }, + { 61, F_GB13_NOLONGERUSED }, + { 61, F_GB14_NOLONGERUSED }, + { 72, F_GBPOL_NOLONGERUSED }, + { 72, F_NPSOLVATION_NOLONGERUSED }, { 41, F_LJC14_Q }, { 41, F_LJC_PAIRS_NB }, { 32, F_BHAM_LR_NOLONGERUSED }, @@ -1268,54 +1278,38 @@ static void do_inputrec(t_fileio *fio, t_inputrec *ir, gmx_bool bRead, } gmx_fio_do_real(fio, ir->tabext); - gmx_fio_do_int(fio, ir->gb_algorithm); - gmx_fio_do_int(fio, ir->nstgbradii); - gmx_fio_do_real(fio, ir->rgbradii); - gmx_fio_do_real(fio, ir->gb_saltconc); - gmx_fio_do_int(fio, ir->implicit_solvent); - if (file_version >= 55) + // This permits reading a .tpr file that used implicit solvent, + // and later permitting mdrun to refuse to run it. + if (bRead) { - gmx_fio_do_real(fio, ir->gb_epsilon_solvent); - gmx_fio_do_real(fio, ir->gb_obc_alpha); - gmx_fio_do_real(fio, ir->gb_obc_beta); - gmx_fio_do_real(fio, ir->gb_obc_gamma); - if (file_version >= 60) + if (file_version < tpxv_RemoveImplicitSolvation) { - gmx_fio_do_real(fio, ir->gb_dielectric_offset); - gmx_fio_do_int(fio, ir->sa_algorithm); + gmx_fio_do_int(fio, idum); + gmx_fio_do_int(fio, idum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_int(fio, idum); + ir->implicit_solvent = (idum > 0); } else { - ir->gb_dielectric_offset = 0.009; - ir->sa_algorithm = esaAPPROX; + ir->implicit_solvent = false; } - gmx_fio_do_real(fio, ir->sa_surface_tension); - - /* Override sa_surface_tension if it is not changed in the mpd-file */ - if (ir->sa_surface_tension < 0) + if (file_version >= 55 && file_version < tpxv_RemoveImplicitSolvation) { - if (ir->gb_algorithm == egbSTILL) - { - ir->sa_surface_tension = 0.0049 * 100 * CAL2JOULE; - } - else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC) + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + if (file_version >= 60) { - ir->sa_surface_tension = 0.0054 * 100 * CAL2JOULE; + gmx_fio_do_real(fio, rdum); + gmx_fio_do_int(fio, idum); } + gmx_fio_do_real(fio, rdum); } - - } - else - { - /* Better use sensible values than insane (0.0) ones... */ - ir->gb_epsilon_solvent = 80; - ir->gb_obc_alpha = 1.0; - ir->gb_obc_beta = 0.8; - ir->gb_obc_gamma = 4.85; - ir->sa_surface_tension = 2.092; } - if (file_version >= 81) { gmx_fio_do_real(fio, ir->fourier_spacing); @@ -2119,22 +2113,28 @@ static void do_iparams(t_fileio *fio, t_functype ftype, t_iparams *iparams, gmx_fio_do_int(fio, iparams->vsiten.n); gmx_fio_do_real(fio, iparams->vsiten.a); break; - case F_GB12: - case F_GB13: - case F_GB14: - /* We got rid of some parameters in version 68 */ - if (bRead && file_version < 68) + case F_GB12_NOLONGERUSED: + case F_GB13_NOLONGERUSED: + case F_GB14_NOLONGERUSED: + // Implicit solvent parameters can still be read, but never used + if (bRead) { - gmx_fio_do_real(fio, rdum); - gmx_fio_do_real(fio, rdum); - gmx_fio_do_real(fio, rdum); - gmx_fio_do_real(fio, rdum); + if (file_version < 68) + { + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + } + if (file_version < tpxv_RemoveImplicitSolvation) + { + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + } } - gmx_fio_do_real(fio, iparams->gb.sar); - gmx_fio_do_real(fio, iparams->gb.st); - gmx_fio_do_real(fio, iparams->gb.pi); - gmx_fio_do_real(fio, iparams->gb.gbr); - gmx_fio_do_real(fio, iparams->gb.bmlt); break; case F_CMAP: gmx_fio_do_int(fio, iparams->cmap.cmapA); @@ -2631,24 +2631,24 @@ static void do_atomtypes(t_fileio *fio, t_atomtypes *atomtypes, gmx_bool bRead, j = atomtypes->nr; if (bRead) { - snew(atomtypes->radius, j); - snew(atomtypes->vol, j); - snew(atomtypes->surftens, j); snew(atomtypes->atomnumber, j); - snew(atomtypes->gb_radius, j); - snew(atomtypes->S_hct, j); } - gmx_fio_ndo_real(fio, atomtypes->radius, j); - gmx_fio_ndo_real(fio, atomtypes->vol, j); - gmx_fio_ndo_real(fio, atomtypes->surftens, j); + if (bRead && file_version < tpxv_RemoveImplicitSolvation) + { + std::vector dummy(atomtypes->nr, 0); + gmx_fio_ndo_real(fio, dummy.data(), dummy.size()); + gmx_fio_ndo_real(fio, dummy.data(), dummy.size()); + gmx_fio_ndo_real(fio, dummy.data(), dummy.size()); + } if (file_version >= 40) { gmx_fio_ndo_int(fio, atomtypes->atomnumber, j); } - if (file_version >= 60) + if (bRead && file_version >= 60 && file_version < tpxv_RemoveImplicitSolvation) { - gmx_fio_ndo_real(fio, atomtypes->gb_radius, j); - gmx_fio_ndo_real(fio, atomtypes->S_hct, j); + std::vector dummy(atomtypes->nr, 0); + gmx_fio_ndo_real(fio, dummy.data(), dummy.size()); + gmx_fio_ndo_real(fio, dummy.data(), dummy.size()); } } diff --git a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp index 381e49c69f..a3c495c885 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp +++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -568,10 +568,6 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, FscalC[i] = -qq[i]*tabscale*FF*rC; break; - case GMX_NBKERNEL_ELEC_GENERALIZEDBORN: - gmx_fatal(FARGS, "Free energy and GB not implemented.\n"); - break; - case GMX_NBKERNEL_ELEC_EWALD: if (bConvertEwaldToCoulomb) { diff --git a/src/gromacs/gmxlib/nonbonded/nb_generic.cpp b/src/gromacs/gmxlib/nonbonded/nb_generic.cpp index 499b9f5d2d..b9ca9c7753 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_generic.cpp +++ b/src/gromacs/gmxlib/nonbonded/nb_generic.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2012,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -293,11 +293,6 @@ gmx_nb_generic_kernel(t_nblist * nlist, felec = -qq*FF*tabscale*rinv; break; - case GMX_NBKERNEL_ELEC_GENERALIZEDBORN: - /* GB */ - gmx_fatal(FARGS, "Death & horror! GB generic interaction not implemented.\n"); - break; - case GMX_NBKERNEL_ELEC_EWALD: ewrt = rsq*rinv*ewtabscale; ewitab = ewrt; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel.h b/src/gromacs/gmxlib/nonbonded/nb_kernel.h index d4f7ca31df..ba297da51e 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel.h +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel.h @@ -70,7 +70,6 @@ typedef struct /* potentials */ real * energygrp_elec; real * energygrp_vdw; - real * energygrp_polarization; } nb_kernel_data_t; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py index 6c87a80af0..a0dd91f741 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c deleted file mode 100644 index ac8fc85251..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c +++ /dev/null @@ -1,858 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_128_fma_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_128_fma_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*95); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_128_fma_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*74); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_128_fma_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*61); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxinvsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -360,9 +346,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm_load1_pd(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -374,16 +357,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm_setzero_pd(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm_setzero_pd(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm_setzero_pd(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_setzero_pd(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -490,13 +467,6 @@ void /* #else */ jq{J} = _mm_load_sd(charge+jnrA+{J}); /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if ROUND =='Loop' */ - isaj{J} = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J}); - /* #else */ - isaj{J} = _mm_load_sd(invsqrta+jnrA+{J}); - /* #endif */ - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -607,68 +577,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_pd(isai{I},isaj{J}); - gbqqfactor = _mm_xor_pd(signbit,_mm_mul_pd(qq{I}{J},_mm_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_pd(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_pd(r{I}{J},gbscale); - gbitab = _mm_cvttpd_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_pd(rt); -#else - gbeps = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - /* #if ROUND == 'Loop' */ - F = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - /* #else */ - F = _mm_setzero_pd(); - /* #endif */ - GMX_MM_TRANSPOSE2_PD(Y,F); - G = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2); - /* #if ROUND == 'Loop' */ - H = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2); - /* #else */ - H = _mm_setzero_pd(); - /* #endif */ - GMX_MM_TRANSPOSE2_PD(G,H); - Fp = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F); - VV = _mm_macc_pd(gbeps,Fp,Y); - vgb = _mm_mul_pd(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - twogbeps = _mm_add_pd(gbeps,gbeps); - FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale)); - dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r{I}{J},vgb)); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd()); - /* #endif */ - dvdasum = _mm_add_pd(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J}))); - /* #else */ - gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J}))); - /* #endif */ - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _mm_mul_pd(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm_mul_pd(_mm_msub_pd(velec,rinv{I}{J},fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -955,17 +863,6 @@ void /* #endif */ velecsum = _mm_add_pd(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm_and_pd(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm_unpacklo_pd(vgb,_mm_setzero_pd()); - /* #endif */ - vgbsum = _mm_add_pd(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ @@ -1110,19 +1007,11 @@ void gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai{I},isai{I})); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py index 7f264cad92..a80e244a54 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c deleted file mode 100644 index 1e7160bc96..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c +++ /dev/null @@ -1,978 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_128_fma_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_128_fma_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - vfeps = _mm_frcz_ps(rt); -#else - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - twovfeps = _mm_add_ps(vfeps,vfeps); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - VV = _mm_macc_ps(vfeps,Fp,Y); - vvdw6 = _mm_mul_ps(c6_00,VV); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - VV = _mm_macc_ps(vfeps,Fp,Y); - vvdw12 = _mm_mul_ps(c12_00,VV); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw12 = _mm_mul_ps(c12_00,FF); - vvdw = _mm_add_ps(vvdw12,vvdw6); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 95 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - vfeps = _mm_frcz_ps(rt); -#else - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - twovfeps = _mm_add_ps(vfeps,vfeps); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - VV = _mm_macc_ps(vfeps,Fp,Y); - vvdw6 = _mm_mul_ps(c6_00,VV); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - VV = _mm_macc_ps(vfeps,Fp,Y); - vvdw12 = _mm_mul_ps(c12_00,VV); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw12 = _mm_mul_ps(c12_00,FF); - vvdw = _mm_add_ps(vvdw12,vvdw6); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdw = _mm_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 96 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*96); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - vfeps = _mm_frcz_ps(rt); -#else - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - twovfeps = _mm_add_ps(vfeps,vfeps); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw12 = _mm_mul_ps(c12_00,FF); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - fscal = _mm_add_ps(felec,fvdw); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 85 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - vfeps = _mm_frcz_ps(rt); -#else - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - twovfeps = _mm_add_ps(vfeps,vfeps); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F); - FF = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp); - fvdw12 = _mm_mul_ps(c12_00,FF); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 86 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*86); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c deleted file mode 100644 index aa4c0b6b0e..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c +++ /dev/null @@ -1,864 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_128_fma_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_128_fma_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm_mul_ps(c6_00,rinvsix); - vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix)); - vvdw = _mm_msub_ps(vvdw12,one_twelfth,_mm_mul_ps(vvdw6,one_sixth)); - fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 74 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm_mul_ps(c6_00,rinvsix); - vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix)); - vvdw = _mm_msub_ps(vvdw12,one_twelfth,_mm_mul_ps(vvdw6,one_sixth)); - fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdw = _mm_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 75 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*75); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm_mul_ps(_mm_msub_ps(c12_00,rinvsix,c6_00),_mm_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm_add_ps(felec,fvdw); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 67 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm_mul_ps(_mm_msub_ps(c12_00,rinvsix,c6_00),_mm_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 68 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*68); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c deleted file mode 100644 index 737a4e83fa..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c +++ /dev/null @@ -1,769 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_128_fma_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_128_fma_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - - fscal = felec; - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 61 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - - fscal = felec; - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 62 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*62); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - fscal = felec; - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 59 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx128fma_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb)); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00); - - fscal = felec; - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Update vectorial force */ - fix0 = _mm_macc_ps(dx00,fscal,fix0); - fiy0 = _mm_macc_ps(dy00,fscal,fiy0); - fiz0 = _mm_macc_ps(dz00,fscal,fiz0); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD, - _mm_mul_ps(dx00,fscal), - _mm_mul_ps(dy00,fscal), - _mm_mul_ps(dz00,fscal)); - - /* Inner loop uses 60 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*60); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c index c87976462a..6489837de0 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_single; @@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single; @@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single; nb_kernel_info_t @@ -294,6 +288,36 @@ nb_kernel_info_t { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" }, @@ -384,72 +408,6 @@ nb_kernel_info_t { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" }, @@ -519,7 +477,37 @@ nb_kernel_info_t { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } + { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } }; int diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre index fc3469af5e..29d3517924 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre @@ -2,7 +2,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -131,12 +131,6 @@ void __m128 velec,felec,velecsum,facel,crf,krf,krf2; real *charge; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - /* #endif */ /* #if KERNEL_VDW != 'None' */ int nvdwtype; __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; @@ -145,7 +139,7 @@ void __m128 one_sixth = _mm_set1_ps(1.0/6.0); __m128 one_twelfth = _mm_set1_ps(1.0/12.0); /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ __m128i vfitab; __m128i ifour = _mm_set1_epi32(4); __m128 rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF; @@ -233,14 +227,6 @@ void /* #endif */ /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -374,9 +360,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm_load1_ps(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -388,16 +371,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm_setzero_ps(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm_setzero_ps(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm_setzero_ps(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_setzero_ps(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -502,10 +479,6 @@ void /* #for J in PARTICLES_ELEC_J */ jq{J} = gmx_mm_load_4real_swizzle_ps(charge+jnrA+{J},charge+jnrB+{J}, charge+jnrC+{J},charge+jnrD+{J}); - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isaj{J} = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J}, - invsqrta+jnrC+{J},invsqrta+jnrD+{J}); - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -621,67 +594,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai{I},isaj{J}); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq{I}{J},_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r{I}{J},gbscale); - gbitab = _mm_cvttps_epi32(rt); -#ifdef __XOP__ - gbeps = _mm_frcz_ps(rt); -#else - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); -#endif - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Fp = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F); - VV = _mm_macc_ps(gbeps,Fp,Y); - vgb = _mm_mul_ps(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - twogbeps = _mm_add_ps(gbeps,gbeps); - FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r{I}{J},vgb)); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - /* #endif */ - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - /* #else */ - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - /* #endif */ - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj{J},isaj{J}))); - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _mm_mul_ps(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm_mul_ps(_mm_msub_ps(velec,rinv{I}{J},fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -907,17 +819,6 @@ void /* #endif */ velecsum = _mm_add_ps(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm_and_ps(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm_andnot_ps(dummy_mask,vgb); - /* #endif */ - vgbsum = _mm_add_ps(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */ @@ -1070,19 +971,11 @@ void gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai{I},isai{I})); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py index d4cfc54224..3240b74352 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c deleted file mode 100644 index 8d3df8b683..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c +++ /dev/null @@ -1,972 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_256_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_256_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m256d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m256d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256d one_sixth = _mm256_set1_pd(1.0/6.0); - __m256d one_twelfth = _mm256_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256d dummy_mask,cutoff_mask; - __m128 tmpmask0,tmpmask1; - __m256d signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) ); - __m256d one = _mm256_set1_pd(1.0); - __m256d two = _mm256_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm256_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_pd(r00,vftabscale); - vfitab = _mm256_cvttpd_epi32(rt); - vfeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp)); - vvdw6 = _mm256_mul_pd(c6_00,VV); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw6 = _mm256_mul_pd(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp)); - vvdw12 = _mm256_mul_pd(c12_00,VV); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw12 = _mm256_mul_pd(c12_00,FF); - vvdw = _mm256_add_pd(vvdw12,vvdw6); - fvdw = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm256_add_pd(velecsum,velec); - vgbsum = _mm256_add_pd(vgbsum,vgb); - vvdwsum = _mm256_add_pd(vvdwsum,vvdw); - - fscal = _mm256_add_pd(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 91 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - r00 = _mm256_andnot_pd(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_pd(r00,vftabscale); - vfitab = _mm256_cvttpd_epi32(rt); - vfeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp)); - vvdw6 = _mm256_mul_pd(c6_00,VV); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw6 = _mm256_mul_pd(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp)); - vvdw12 = _mm256_mul_pd(c12_00,VV); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw12 = _mm256_mul_pd(c12_00,FF); - vvdw = _mm256_add_pd(vvdw12,vvdw6); - fvdw = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm256_andnot_pd(dummy_mask,velec); - velecsum = _mm256_add_pd(velecsum,velec); - vgb = _mm256_andnot_pd(dummy_mask,vgb); - vgbsum = _mm256_add_pd(vgbsum,vgb); - vvdw = _mm256_andnot_pd(dummy_mask,vvdw); - vvdwsum = _mm256_add_pd(vvdwsum,vvdw); - - fscal = _mm256_add_pd(felec,fvdw); - - fscal = _mm256_andnot_pd(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 92 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm256_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0)); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m256d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m256d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256d one_sixth = _mm256_set1_pd(1.0/6.0); - __m256d one_twelfth = _mm256_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256d dummy_mask,cutoff_mask; - __m128 tmpmask0,tmpmask1; - __m256d signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) ); - __m256d one = _mm256_set1_pd(1.0); - __m256d two = _mm256_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm256_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_pd(r00,vftabscale); - vfitab = _mm256_cvttpd_epi32(rt); - vfeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw6 = _mm256_mul_pd(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw12 = _mm256_mul_pd(c12_00,FF); - fvdw = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00))); - - fscal = _mm256_add_pd(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 81 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - r00 = _mm256_andnot_pd(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_pd(r00,vftabscale); - vfitab = _mm256_cvttpd_epi32(rt); - vfeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw6 = _mm256_mul_pd(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) ); - F = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) ); - G = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) ); - H = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(vfeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps))); - FF = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fvdw12 = _mm256_mul_pd(c12_00,FF); - fvdw = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00))); - - fscal = _mm256_add_pd(felec,fvdw); - - fscal = _mm256_andnot_pd(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 82 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0)); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*82); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c deleted file mode 100644 index 132b4e69b3..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_256_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_256_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m256d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m256d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256d one_sixth = _mm256_set1_pd(1.0/6.0); - __m256d one_twelfth = _mm256_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256d dummy_mask,cutoff_mask; - __m128 tmpmask0,tmpmask1; - __m256d signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) ); - __m256d one = _mm256_set1_pd(1.0); - __m256d two = _mm256_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - rinvsq00 = _mm256_mul_pd(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm256_mul_pd(c6_00,rinvsix); - vvdw12 = _mm256_mul_pd(c12_00,_mm256_mul_pd(rinvsix,rinvsix)); - vvdw = _mm256_sub_pd( _mm256_mul_pd(vvdw12,one_twelfth) , _mm256_mul_pd(vvdw6,one_sixth) ); - fvdw = _mm256_mul_pd(_mm256_sub_pd(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm256_add_pd(velecsum,velec); - vgbsum = _mm256_add_pd(vgbsum,vgb); - vvdwsum = _mm256_add_pd(vvdwsum,vvdw); - - fscal = _mm256_add_pd(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 70 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - rinvsq00 = _mm256_mul_pd(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - r00 = _mm256_andnot_pd(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm256_mul_pd(c6_00,rinvsix); - vvdw12 = _mm256_mul_pd(c12_00,_mm256_mul_pd(rinvsix,rinvsix)); - vvdw = _mm256_sub_pd( _mm256_mul_pd(vvdw12,one_twelfth) , _mm256_mul_pd(vvdw6,one_sixth) ); - fvdw = _mm256_mul_pd(_mm256_sub_pd(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm256_andnot_pd(dummy_mask,velec); - velecsum = _mm256_add_pd(velecsum,velec); - vgb = _mm256_andnot_pd(dummy_mask,vgb); - vgbsum = _mm256_add_pd(vgbsum,vgb); - vvdw = _mm256_andnot_pd(dummy_mask,vvdw); - vvdwsum = _mm256_add_pd(vvdwsum,vvdw); - - fscal = _mm256_add_pd(felec,fvdw); - - fscal = _mm256_andnot_pd(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 71 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm256_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0)); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m256d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m256d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256d one_sixth = _mm256_set1_pd(1.0/6.0); - __m256d one_twelfth = _mm256_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256d dummy_mask,cutoff_mask; - __m128 tmpmask0,tmpmask1; - __m256d signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) ); - __m256d one = _mm256_set1_pd(1.0); - __m256d two = _mm256_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - rinvsq00 = _mm256_mul_pd(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(c12_00,rinvsix),c6_00),_mm256_mul_pd(rinvsix,rinvsq00)); - - fscal = _mm256_add_pd(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 63 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - rinvsq00 = _mm256_mul_pd(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - r00 = _mm256_andnot_pd(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(c12_00,rinvsix),c6_00),_mm256_mul_pd(rinvsix,rinvsq00)); - - fscal = _mm256_add_pd(felec,fvdw); - - fscal = _mm256_andnot_pd(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 64 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0)); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c deleted file mode 100644 index a64785a563..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c +++ /dev/null @@ -1,775 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_256_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_256_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m256d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m256d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256d dummy_mask,cutoff_mask; - __m128 tmpmask0,tmpmask1; - __m256d signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) ); - __m256d one = _mm256_set1_pd(1.0); - __m256d two = _mm256_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm256_add_pd(velecsum,velec); - vgbsum = _mm256_add_pd(vgbsum,vgb); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 57 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - r00 = _mm256_andnot_pd(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm256_andnot_pd(dummy_mask,velec); - velecsum = _mm256_add_pd(velecsum,velec); - vgb = _mm256_andnot_pd(dummy_mask,vgb); - vgbsum = _mm256_add_pd(vgbsum,vgb); - - fscal = felec; - - fscal = _mm256_andnot_pd(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 58 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0)); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m256d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m256d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256d dummy_mask,cutoff_mask; - __m128 tmpmask0,tmpmask1; - __m256d signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) ); - __m256d one = _mm256_set1_pd(1.0); - __m256d two = _mm256_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 55 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_pd(ix0,jx0); - dy00 = _mm256_sub_pd(iy0,jy0); - dz00 = _mm256_sub_pd(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_d(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_pd(rsq00,rinv00); - r00 = _mm256_andnot_pd(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_pd(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai0,isaj0); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r00,gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00))); - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0))); - velec = _mm256_mul_pd(qq00,rinv00); - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00); - - fscal = felec; - - fscal = _mm256_andnot_pd(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_pd(fscal,dx00); - ty = _mm256_mul_pd(fscal,dy00); - tz = _mm256_mul_pd(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_pd(fix0,tx); - fiy0 = _mm256_add_pd(fiy0,ty); - fiz0 = _mm256_add_pd(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 56 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0)); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*56); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c index 9265074e27..b695cfff25 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_double; nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_double; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_double; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_double; @@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_double; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double; @@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double; nb_kernel_info_t @@ -294,6 +288,36 @@ nb_kernel_info_t { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" }, @@ -384,72 +408,6 @@ nb_kernel_info_t { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" }, @@ -519,7 +477,37 @@ nb_kernel_info_t { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } + { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } }; int diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre index a25ec0e390..f0197c104d 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre @@ -2,7 +2,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -135,12 +135,6 @@ void __m256d velec,felec,velecsum,facel,crf,krf,krf2; real *charge; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - __m128i gbitab; - __m256d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256d minushalf = _mm256_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - /* #endif */ /* #if KERNEL_VDW != 'None' */ int nvdwtype; __m256d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; @@ -149,7 +143,7 @@ void __m256d one_sixth = _mm256_set1_pd(1.0/6.0); __m256d one_twelfth = _mm256_set1_pd(1.0/12.0); /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ __m128i vfitab; __m128i ifour = _mm_set1_epi32(4); __m256d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; @@ -239,14 +233,6 @@ void /* #endif */ /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -383,9 +369,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+{I}])); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm256_set1_pd(invsqrta[inr+{I}]); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffsetptr{I} = vdwparam+2*nvdwtype*vdwtype[inr+{I}]; @@ -400,16 +383,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm256_setzero_pd(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm256_setzero_pd(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm256_setzero_pd(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm256_setzero_pd(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -519,10 +496,6 @@ void /* #for J in PARTICLES_ELEC_J */ jq{J} = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+{J},charge+jnrB+{J}, charge+jnrC+{J},charge+jnrD+{J}); - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isaj{J} = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J}, - invsqrta+jnrC+{J},invsqrta+jnrD+{J}); - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -632,63 +605,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_pd(isai{I},isaj{J}); - gbqqfactor = _mm256_xor_pd(signbit,_mm256_mul_pd(qq{I}{J},_mm256_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_pd(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_pd(r{I}{J},gbscale); - gbitab = _mm256_cvttpd_epi32(rt); - gbeps = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) ); - F = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) ); - G = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) ); - H = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) ); - GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H); - Heps = _mm256_mul_pd(gbeps,H); - Fp = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps))); - VV = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp)); - vgb = _mm256_mul_pd(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps)))); - fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale)); - dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r{I}{J}))); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm256_andnot_pd(dummy_mask,dvdatmp); - /* #endif */ - dvdasum = _mm256_add_pd(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - /* #else */ - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - /* #endif */ - gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD, - _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj{J},isaj{J}))); - /* #define INNERFLOPS INNERFLOPS+12 */ - /* #endif */ - velec = _mm256_mul_pd(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv{I}{J}),fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -935,17 +851,6 @@ void /* #endif */ velecsum = _mm256_add_pd(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm256_and_pd(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm256_andnot_pd(dummy_mask,vgb); - /* #endif */ - vgbsum = _mm256_add_pd(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */ @@ -1098,19 +1003,11 @@ void gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm256_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai{I},isai{I})); - gmx_mm256_update_1pot_pd(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py index b44938008e..dbb439ce39 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c deleted file mode 100644 index 7eb97eec87..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c +++ /dev/null @@ -1,1192 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_256_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_256_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrE,jnrF,jnrG,jnrH; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH; - real scratch[4*DIM]; - __m256 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H; - __m256 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256 one_sixth = _mm256_set1_ps(1.0/6.0); - __m256 one_twelfth = _mm256_set1_ps(1.0/12.0); - __m256i vfitab; - __m128i vfitab_lo,vfitab_hi; - __m128i ifour = _mm_set1_epi32(4); - __m256 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256 dummy_mask,cutoff_mask; - __m256 signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) ); - __m256 one = _mm256_set1_ps(1.0); - __m256 two = _mm256_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm256_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - j_coord_offsetE = 0; - j_coord_offsetF = 0; - j_coord_offsetG = 0; - j_coord_offsetH = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=8) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - jnrE = jjnr[jidx+4]; - jnrF = jjnr[jidx+5]; - jnrG = jjnr[jidx+6]; - jnrH = jjnr[jidx+7]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_ps(r00,vftabscale); - vfitab = _mm256_cvttps_epi32(rt); - vfeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - vfitab_lo = _mm256_extractf128_si256(vfitab,0x0); - vfitab_hi = _mm256_extractf128_si256(vfitab,0x1); - vfitab_lo = _mm_slli_epi32(vfitab_lo,3); - vfitab_hi = _mm_slli_epi32(vfitab_hi,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp)); - vvdw6 = _mm256_mul_ps(c6_00,VV); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw6 = _mm256_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab_lo = _mm_add_epi32(vfitab_lo,ifour); - vfitab_hi = _mm_add_epi32(vfitab_hi,ifour); - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp)); - vvdw12 = _mm256_mul_ps(c12_00,VV); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw12 = _mm256_mul_ps(c12_00,FF); - vvdw = _mm256_add_ps(vvdw12,vvdw6); - fvdw = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm256_add_ps(velecsum,velec); - vgbsum = _mm256_add_ps(vgbsum,vgb); - vvdwsum = _mm256_add_ps(vvdwsum,vvdw); - - fscal = _mm256_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - fjptrE = f+j_coord_offsetE; - fjptrF = f+j_coord_offsetF; - fjptrG = f+j_coord_offsetG; - fjptrH = f+j_coord_offsetH; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 91 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - jnrE = (jnrlistE>=0) ? jnrlistE : 0; - jnrF = (jnrlistF>=0) ? jnrlistF : 0; - jnrG = (jnrlistG>=0) ? jnrlistG : 0; - jnrH = (jnrlistH>=0) ? jnrlistH : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - r00 = _mm256_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_ps(r00,vftabscale); - vfitab = _mm256_cvttps_epi32(rt); - vfeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - vfitab_lo = _mm256_extractf128_si256(vfitab,0x0); - vfitab_hi = _mm256_extractf128_si256(vfitab,0x1); - vfitab_lo = _mm_slli_epi32(vfitab_lo,3); - vfitab_hi = _mm_slli_epi32(vfitab_hi,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp)); - vvdw6 = _mm256_mul_ps(c6_00,VV); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw6 = _mm256_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab_lo = _mm_add_epi32(vfitab_lo,ifour); - vfitab_hi = _mm_add_epi32(vfitab_hi,ifour); - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp)); - vvdw12 = _mm256_mul_ps(c12_00,VV); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw12 = _mm256_mul_ps(c12_00,FF); - vvdw = _mm256_add_ps(vvdw12,vvdw6); - fvdw = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm256_andnot_ps(dummy_mask,velec); - velecsum = _mm256_add_ps(velecsum,velec); - vgb = _mm256_andnot_ps(dummy_mask,vgb); - vgbsum = _mm256_add_ps(vgbsum,vgb); - vvdw = _mm256_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm256_add_ps(vvdwsum,vvdw); - - fscal = _mm256_add_ps(felec,fvdw); - - fscal = _mm256_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - fjptrE = (jnrlistE>=0) ? f+j_coord_offsetE : scratch; - fjptrF = (jnrlistF>=0) ? f+j_coord_offsetF : scratch; - fjptrG = (jnrlistG>=0) ? f+j_coord_offsetG : scratch; - fjptrH = (jnrlistH>=0) ? f+j_coord_offsetH : scratch; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 92 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm256_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0)); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrE,jnrF,jnrG,jnrH; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH; - real scratch[4*DIM]; - __m256 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H; - __m256 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256 one_sixth = _mm256_set1_ps(1.0/6.0); - __m256 one_twelfth = _mm256_set1_ps(1.0/12.0); - __m256i vfitab; - __m128i vfitab_lo,vfitab_hi; - __m128i ifour = _mm_set1_epi32(4); - __m256 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256 dummy_mask,cutoff_mask; - __m256 signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) ); - __m256 one = _mm256_set1_ps(1.0); - __m256 two = _mm256_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm256_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - j_coord_offsetE = 0; - j_coord_offsetF = 0; - j_coord_offsetG = 0; - j_coord_offsetH = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=8) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - jnrE = jjnr[jidx+4]; - jnrF = jjnr[jidx+5]; - jnrG = jjnr[jidx+6]; - jnrH = jjnr[jidx+7]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_ps(r00,vftabscale); - vfitab = _mm256_cvttps_epi32(rt); - vfeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - vfitab_lo = _mm256_extractf128_si256(vfitab,0x0); - vfitab_hi = _mm256_extractf128_si256(vfitab,0x1); - vfitab_lo = _mm_slli_epi32(vfitab_lo,3); - vfitab_hi = _mm_slli_epi32(vfitab_hi,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw6 = _mm256_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab_lo = _mm_add_epi32(vfitab_lo,ifour); - vfitab_hi = _mm_add_epi32(vfitab_hi,ifour); - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw12 = _mm256_mul_ps(c12_00,FF); - fvdw = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00))); - - fscal = _mm256_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - fjptrE = f+j_coord_offsetE; - fjptrF = f+j_coord_offsetF; - fjptrG = f+j_coord_offsetG; - fjptrH = f+j_coord_offsetH; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 81 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - jnrE = (jnrlistE>=0) ? jnrlistE : 0; - jnrF = (jnrlistF>=0) ? jnrlistF : 0; - jnrG = (jnrlistG>=0) ? jnrlistG : 0; - jnrH = (jnrlistH>=0) ? jnrlistH : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - r00 = _mm256_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm256_mul_ps(r00,vftabscale); - vfitab = _mm256_cvttps_epi32(rt); - vfeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - vfitab_lo = _mm256_extractf128_si256(vfitab,0x0); - vfitab_hi = _mm256_extractf128_si256(vfitab,0x1); - vfitab_lo = _mm_slli_epi32(vfitab_lo,3); - vfitab_hi = _mm_slli_epi32(vfitab_hi,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw6 = _mm256_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab_lo = _mm_add_epi32(vfitab_lo,ifour); - vfitab_hi = _mm_add_epi32(vfitab_hi,ifour); - Y = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)), - _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(vfeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps))); - FF = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fvdw12 = _mm256_mul_ps(c12_00,FF); - fvdw = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00))); - - fscal = _mm256_add_ps(felec,fvdw); - - fscal = _mm256_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - fjptrE = (jnrlistE>=0) ? f+j_coord_offsetE : scratch; - fjptrF = (jnrlistF>=0) ? f+j_coord_offsetF : scratch; - fjptrG = (jnrlistG>=0) ? f+j_coord_offsetG : scratch; - fjptrH = (jnrlistH>=0) ? f+j_coord_offsetH : scratch; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 82 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0)); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*82); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c deleted file mode 100644 index 6dfbecb6a4..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c +++ /dev/null @@ -1,1038 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_256_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_256_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrE,jnrF,jnrG,jnrH; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH; - real scratch[4*DIM]; - __m256 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H; - __m256 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256 one_sixth = _mm256_set1_ps(1.0/6.0); - __m256 one_twelfth = _mm256_set1_ps(1.0/12.0); - __m256i vfitab; - __m128i vfitab_lo,vfitab_hi; - __m128i ifour = _mm_set1_epi32(4); - __m256 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256 dummy_mask,cutoff_mask; - __m256 signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) ); - __m256 one = _mm256_set1_ps(1.0); - __m256 two = _mm256_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - j_coord_offsetE = 0; - j_coord_offsetF = 0; - j_coord_offsetG = 0; - j_coord_offsetH = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=8) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - jnrE = jjnr[jidx+4]; - jnrF = jjnr[jidx+5]; - jnrG = jjnr[jidx+6]; - jnrH = jjnr[jidx+7]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - rinvsq00 = _mm256_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm256_mul_ps(c6_00,rinvsix); - vvdw12 = _mm256_mul_ps(c12_00,_mm256_mul_ps(rinvsix,rinvsix)); - vvdw = _mm256_sub_ps( _mm256_mul_ps(vvdw12,one_twelfth) , _mm256_mul_ps(vvdw6,one_sixth) ); - fvdw = _mm256_mul_ps(_mm256_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm256_add_ps(velecsum,velec); - vgbsum = _mm256_add_ps(vgbsum,vgb); - vvdwsum = _mm256_add_ps(vvdwsum,vvdw); - - fscal = _mm256_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - fjptrE = f+j_coord_offsetE; - fjptrF = f+j_coord_offsetF; - fjptrG = f+j_coord_offsetG; - fjptrH = f+j_coord_offsetH; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 70 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - jnrE = (jnrlistE>=0) ? jnrlistE : 0; - jnrF = (jnrlistF>=0) ? jnrlistF : 0; - jnrG = (jnrlistG>=0) ? jnrlistG : 0; - jnrH = (jnrlistH>=0) ? jnrlistH : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - rinvsq00 = _mm256_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - r00 = _mm256_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm256_mul_ps(c6_00,rinvsix); - vvdw12 = _mm256_mul_ps(c12_00,_mm256_mul_ps(rinvsix,rinvsix)); - vvdw = _mm256_sub_ps( _mm256_mul_ps(vvdw12,one_twelfth) , _mm256_mul_ps(vvdw6,one_sixth) ); - fvdw = _mm256_mul_ps(_mm256_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm256_andnot_ps(dummy_mask,velec); - velecsum = _mm256_add_ps(velecsum,velec); - vgb = _mm256_andnot_ps(dummy_mask,vgb); - vgbsum = _mm256_add_ps(vgbsum,vgb); - vvdw = _mm256_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm256_add_ps(vvdwsum,vvdw); - - fscal = _mm256_add_ps(felec,fvdw); - - fscal = _mm256_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - fjptrE = (jnrlistE>=0) ? f+j_coord_offsetE : scratch; - fjptrF = (jnrlistF>=0) ? f+j_coord_offsetF : scratch; - fjptrG = (jnrlistG>=0) ? f+j_coord_offsetG : scratch; - fjptrH = (jnrlistH>=0) ? f+j_coord_offsetH : scratch; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 71 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm256_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0)); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrE,jnrF,jnrG,jnrH; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH; - real scratch[4*DIM]; - __m256 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H; - __m256 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m256 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m256 one_sixth = _mm256_set1_ps(1.0/6.0); - __m256 one_twelfth = _mm256_set1_ps(1.0/12.0); - __m256i vfitab; - __m128i vfitab_lo,vfitab_hi; - __m128i ifour = _mm_set1_epi32(4); - __m256 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256 dummy_mask,cutoff_mask; - __m256 signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) ); - __m256 one = _mm256_set1_ps(1.0); - __m256 two = _mm256_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - j_coord_offsetE = 0; - j_coord_offsetF = 0; - j_coord_offsetG = 0; - j_coord_offsetH = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=8) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - jnrE = jjnr[jidx+4]; - jnrF = jjnr[jidx+5]; - jnrG = jjnr[jidx+6]; - jnrH = jjnr[jidx+7]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - rinvsq00 = _mm256_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(c12_00,rinvsix),c6_00),_mm256_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm256_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - fjptrE = f+j_coord_offsetE; - fjptrF = f+j_coord_offsetF; - fjptrG = f+j_coord_offsetG; - fjptrH = f+j_coord_offsetH; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 63 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - jnrE = (jnrlistE>=0) ? jnrlistE : 0; - jnrF = (jnrlistF>=0) ? jnrlistF : 0; - jnrG = (jnrlistG>=0) ? jnrlistG : 0; - jnrH = (jnrlistH>=0) ? jnrlistH : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - rinvsq00 = _mm256_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - vdwjidx0E = 2*vdwtype[jnrE+0]; - vdwjidx0F = 2*vdwtype[jnrF+0]; - vdwjidx0G = 2*vdwtype[jnrG+0]; - vdwjidx0H = 2*vdwtype[jnrH+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - r00 = _mm256_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A, - vdwioffsetptr0+vdwjidx0B, - vdwioffsetptr0+vdwjidx0C, - vdwioffsetptr0+vdwjidx0D, - vdwioffsetptr0+vdwjidx0E, - vdwioffsetptr0+vdwjidx0F, - vdwioffsetptr0+vdwjidx0G, - vdwioffsetptr0+vdwjidx0H, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(c12_00,rinvsix),c6_00),_mm256_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm256_add_ps(felec,fvdw); - - fscal = _mm256_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - fjptrE = (jnrlistE>=0) ? f+j_coord_offsetE : scratch; - fjptrF = (jnrlistF>=0) ? f+j_coord_offsetF : scratch; - fjptrG = (jnrlistG>=0) ? f+j_coord_offsetG : scratch; - fjptrH = (jnrlistH>=0) ? f+j_coord_offsetH : scratch; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 64 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0)); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c deleted file mode 100644 index 3c4db3bb86..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS avx_256_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_avx_256_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrE,jnrF,jnrG,jnrH; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH; - real scratch[4*DIM]; - __m256 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H; - __m256 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m256i vfitab; - __m128i vfitab_lo,vfitab_hi; - __m128i ifour = _mm_set1_epi32(4); - __m256 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256 dummy_mask,cutoff_mask; - __m256 signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) ); - __m256 one = _mm256_set1_ps(1.0); - __m256 two = _mm256_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - j_coord_offsetE = 0; - j_coord_offsetF = 0; - j_coord_offsetG = 0; - j_coord_offsetH = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=8) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - jnrE = jjnr[jidx+4]; - jnrF = jjnr[jidx+5]; - jnrG = jjnr[jidx+6]; - jnrH = jjnr[jidx+7]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm256_add_ps(velecsum,velec); - vgbsum = _mm256_add_ps(vgbsum,vgb); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - fjptrE = f+j_coord_offsetE; - fjptrF = f+j_coord_offsetF; - fjptrG = f+j_coord_offsetG; - fjptrH = f+j_coord_offsetH; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 57 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - jnrE = (jnrlistE>=0) ? jnrlistE : 0; - jnrF = (jnrlistF>=0) ? jnrlistF : 0; - jnrG = (jnrlistG>=0) ? jnrlistG : 0; - jnrH = (jnrlistH>=0) ? jnrlistH : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - r00 = _mm256_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm256_andnot_ps(dummy_mask,velec); - velecsum = _mm256_add_ps(velecsum,velec); - vgb = _mm256_andnot_ps(dummy_mask,vgb); - vgbsum = _mm256_add_ps(vgbsum,vgb); - - fscal = felec; - - fscal = _mm256_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - fjptrE = (jnrlistE>=0) ? f+j_coord_offsetE : scratch; - fjptrF = (jnrlistF>=0) ? f+j_coord_offsetF : scratch; - fjptrG = (jnrlistG>=0) ? f+j_coord_offsetG : scratch; - fjptrH = (jnrlistH>=0) ? f+j_coord_offsetH : scratch; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 58 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0)); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrE,jnrF,jnrG,jnrH; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int jnrlistE,jnrlistF,jnrlistG,jnrlistH; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH; - real scratch[4*DIM]; - __m256 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - real * vdwioffsetptr0; - __m256 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H; - __m256 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m256 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m256 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m256i vfitab; - __m128i vfitab_lo,vfitab_hi; - __m128i ifour = _mm_set1_epi32(4); - __m256 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m256 dummy_mask,cutoff_mask; - __m256 signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) ); - __m256 one = _mm256_set1_ps(1.0); - __m256 two = _mm256_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm256_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - j_coord_offsetE = 0; - j_coord_offsetF = 0; - j_coord_offsetG = 0; - j_coord_offsetH = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=8) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - jnrE = jjnr[jidx+4]; - jnrF = jjnr[jidx+5]; - jnrG = jjnr[jidx+6]; - jnrH = jjnr[jidx+7]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - fjptrE = f+j_coord_offsetE; - fjptrF = f+j_coord_offsetF; - fjptrG = f+j_coord_offsetG; - fjptrH = f+j_coord_offsetH; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 55 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - jnrE = (jnrlistE>=0) ? jnrlistE : 0; - jnrF = (jnrlistF>=0) ? jnrlistF : 0; - jnrG = (jnrlistG>=0) ? jnrlistG : 0; - jnrH = (jnrlistH>=0) ? jnrlistH : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - j_coord_offsetE = DIM*jnrE; - j_coord_offsetF = DIM*jnrF; - j_coord_offsetG = DIM*jnrG; - j_coord_offsetH = DIM*jnrH; - - /* load j atom coordinates */ - gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - x+j_coord_offsetE,x+j_coord_offsetF, - x+j_coord_offsetG,x+j_coord_offsetH, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm256_sub_ps(ix0,jx0); - dy00 = _mm256_sub_ps(iy0,jy0); - dz00 = _mm256_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = avx256_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0, - charge+jnrE+0,charge+jnrF+0, - charge+jnrG+0,charge+jnrH+0); - isaj0 = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0, - invsqrta+jnrE+0,invsqrta+jnrF+0, - invsqrta+jnrG+0,invsqrta+jnrH+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm256_mul_ps(rsq00,rinv00); - r00 = _mm256_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm256_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai0,isaj0); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r00,gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00))); - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0))); - velec = _mm256_mul_ps(qq00,rinv00); - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00); - - fscal = felec; - - fscal = _mm256_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm256_mul_ps(fscal,dx00); - ty = _mm256_mul_ps(fscal,dy00); - tz = _mm256_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm256_add_ps(fix0,tx); - fiy0 = _mm256_add_ps(fiy0,ty); - fiz0 = _mm256_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - fjptrE = (jnrlistE>=0) ? f+j_coord_offsetE : scratch; - fjptrF = (jnrlistF>=0) ? f+j_coord_offsetF : scratch; - fjptrG = (jnrlistG>=0) ? f+j_coord_offsetG : scratch; - fjptrH = (jnrlistH>=0) ? f+j_coord_offsetH : scratch; - gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz); - - /* Inner loop uses 56 flops */ - } - - /* End of innermost loop */ - - gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0)); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*56); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c index b4debc8f40..5a70e18520 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_single; nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_single; @@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single; @@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single; nb_kernel_info_t @@ -294,6 +288,36 @@ nb_kernel_info_t { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" }, @@ -384,72 +408,6 @@ nb_kernel_info_t { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" }, @@ -519,7 +477,37 @@ nb_kernel_info_t { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } + { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } }; int diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre index 837fce7857..d4c8e38f35 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre @@ -2,7 +2,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -140,13 +140,6 @@ void __m256 velec,felec,velecsum,facel,crf,krf,krf2; real *charge; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - __m256i gbitab; - __m128i gbitab_lo,gbitab_hi; - __m256 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m256 minushalf = _mm256_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - /* #endif */ /* #if KERNEL_VDW != 'None' */ int nvdwtype; __m256 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; @@ -155,7 +148,7 @@ void __m256 one_sixth = _mm256_set1_ps(1.0/6.0); __m256 one_twelfth = _mm256_set1_ps(1.0/12.0); /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ __m256i vfitab; __m128i vfitab_lo,vfitab_hi; __m128i ifour = _mm_set1_epi32(4); @@ -246,14 +239,6 @@ void /* #endif */ /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm256_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -394,9 +379,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+{I}])); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm256_set1_ps(invsqrta[inr+{I}]); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffsetptr{I} = vdwparam+2*nvdwtype*vdwtype[inr+{I}]; @@ -411,16 +393,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm256_setzero_ps(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm256_setzero_ps(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm256_setzero_ps(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm256_setzero_ps(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -553,12 +529,6 @@ void charge+jnrC+{J},charge+jnrD+{J}, charge+jnrE+{J},charge+jnrF+{J}, charge+jnrG+{J},charge+jnrH+{J}); - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isaj{J} = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J}, - invsqrta+jnrC+{J},invsqrta+jnrD+{J}, - invsqrta+jnrE+{J},invsqrta+jnrF+{J}, - invsqrta+jnrG+{J},invsqrta+jnrH+{J}); - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -686,79 +656,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm256_mul_ps(isai{I},isaj{J}); - gbqqfactor = _mm256_xor_ps(signbit,_mm256_mul_ps(qq{I}{J},_mm256_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm256_mul_ps(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm256_mul_ps(r{I}{J},gbscale); - gbitab = _mm256_cvttps_epi32(rt); - gbeps = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR)); - /* AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */ - gbitab_lo = _mm256_extractf128_si256(gbitab,0x0); - gbitab_hi = _mm256_extractf128_si256(gbitab,0x1); - gbitab_lo = _mm_slli_epi32(gbitab_lo,2); - gbitab_hi = _mm_slli_epi32(gbitab_hi,2); - Y = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0))); - F = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1))); - G = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2))); - H = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)), - _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3))); - GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H); - Heps = _mm256_mul_ps(gbeps,H); - Fp = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps))); - VV = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp)); - vgb = _mm256_mul_ps(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps)))); - fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale)); - dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r{I}{J}))); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp); - /* #endif */ - dvdasum = _mm256_add_ps(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - fjptrE = dvda+jnrE; - fjptrF = dvda+jnrF; - fjptrG = dvda+jnrG; - fjptrH = dvda+jnrH; - /* #else */ - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - fjptrE = (jnrlistE>=0) ? dvda+jnrE : scratch; - fjptrF = (jnrlistF>=0) ? dvda+jnrF : scratch; - fjptrG = (jnrlistG>=0) ? dvda+jnrG : scratch; - fjptrH = (jnrlistH>=0) ? dvda+jnrH : scratch; - /* #endif */ - gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH, - _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj{J},isaj{J}))); - /* #define INNERFLOPS INNERFLOPS+12 */ - /* #endif */ - velec = _mm256_mul_ps(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv{I}{J}),fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -1005,17 +902,6 @@ void /* #endif */ velecsum = _mm256_add_ps(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm256_and_ps(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm256_andnot_ps(dummy_mask,vgb); - /* #endif */ - vgbsum = _mm256_add_ps(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */ @@ -1185,19 +1071,11 @@ void gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm256_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai{I},isai{I})); - gmx_mm256_update_1pot_ps(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py index 53f5022bff..bba8574011 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c deleted file mode 100644 index 343dd09dc4..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014.2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS c kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: Buckingham - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec[ggid] += velecsum; - kernel_data->energygrp_polarization[ggid] += vgbsum; - kernel_data->energygrp_vdw[ggid] += vvdwsum; - dvda[inr] = dvda[inr] + dvdasum*isai0*isai0; - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 16 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*97); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: Buckingham - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = kernel_data->table_vdw->scale; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec[ggid] += velecsum; - kernel_data->energygrp_polarization[ggid] += vgbsum; - kernel_data->energygrp_vdw[ggid] += vvdwsum; - dvda[inr] = dvda[inr] + dvdasum*isai0*isai0; - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 16 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*91); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = kernel_data->table_vdw->scale; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec[ggid] += velecsum; - kernel_data->energygrp_polarization[ggid] += vgbsum; - kernel_data->energygrp_vdw[ggid] += vvdwsum; - dvda[inr] = dvda[inr] + dvdasum*isai0*isai0; - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 16 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*71); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec[ggid] += velecsum; - kernel_data->energygrp_polarization[ggid] += vgbsum; - dvda[inr] = dvda[inr] + dvdasum*isai0*isai0; - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 15 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*15 + inneriter*58); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - int i_shift_offset,i_coord_offset,j_coord_offset; - int j_index_start,j_index_end; - int nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter; - real shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real *shiftvec,*fshift,*x,*f; - int vdwioffset0; - real ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0; - real jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - real dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00; - real velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - int vfitab; - real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; - real *vftab; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = fr->ic->epsfac; - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/utility/real.h" -#include "gromacs/utility/smalloc.h" - -typedef struct -{ - real ** pvdwparam; - int * jindex; - int ** exclusion_mask; -} -gmx_allvsall_data_t; - -static int -calc_maxoffset(int i, int natoms) -{ - int maxoffset; - - if ((natoms % 2) == 1) - { - /* Odd number of atoms, easy */ - maxoffset = natoms/2; - } - else if ((natoms % 4) == 0) - { - /* Multiple of four is hard */ - if (i < natoms/2) - { - if ((i % 2) == 0) - { - maxoffset = natoms/2; - } - else - { - maxoffset = natoms/2-1; - } - } - else - { - if ((i % 2) == 1) - { - maxoffset = natoms/2; - } - else - { - maxoffset = natoms/2-1; - } - } - } - else - { - /* natoms/2 = odd */ - if ((i % 2) == 0) - { - maxoffset = natoms/2; - } - else - { - maxoffset = natoms/2-1; - } - } - - return maxoffset; -} - - -static void -setup_exclusions_and_indices(gmx_allvsall_data_t * aadata, - t_blocka * excl, - int natoms) -{ - int i, j, k; - int nj0, nj1; - int max_offset; - int max_excl_offset; - int iexcl; - - /* This routine can appear to be a bit complex, but it is mostly book-keeping. - * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates - * whether they should interact or not. - * - * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction - * should be present, otherwise 0. Since exclusions typically only occur when i & j are close, - * we create a jindex array with three elements per i atom: the starting point, the point to - * which we need to check exclusions, and the end point. - * This way we only have to allocate a short exclusion mask per i atom. - */ - - /* Allocate memory for our modified jindex array */ - snew(aadata->jindex, 3*natoms); - - /* Pointer to lists with exclusion masks */ - snew(aadata->exclusion_mask, natoms); - - for (i = 0; i < natoms; i++) - { - /* Start */ - aadata->jindex[3*i] = i+1; - max_offset = calc_maxoffset(i, natoms); - - /* Exclusions */ - nj0 = excl->index[i]; - nj1 = excl->index[i+1]; - - /* first check the max range */ - max_excl_offset = -1; - - for (j = nj0; j < nj1; j++) - { - iexcl = excl->a[j]; - - k = iexcl - i; - - if (k+natoms <= max_offset) - { - k += natoms; - } - - max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset; - } - - max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset; - - aadata->jindex[3*i+1] = i+1+max_excl_offset; - - snew(aadata->exclusion_mask[i], max_excl_offset); - /* Include everything by default */ - for (j = 0; j < max_excl_offset; j++) - { - /* Use all-ones to mark interactions that should be present, compatible with SSE */ - aadata->exclusion_mask[i][j] = 0xFFFFFFFF; - } - - /* Go through exclusions again */ - for (j = nj0; j < nj1; j++) - { - iexcl = excl->a[j]; - - k = iexcl - i; - - if (k+natoms <= max_offset) - { - k += natoms; - } - - if (k > 0 && k <= max_excl_offset) - { - /* Excluded, kill it! */ - aadata->exclusion_mask[i][k-1] = 0; - } - } - - /* End */ - aadata->jindex[3*i+2] = i+1+max_offset; - } -} - - -static void -setup_aadata(gmx_allvsall_data_t ** p_aadata, - t_blocka * excl, - int natoms, - int * type, - int ntype, - real * pvdwparam) -{ - int i, j, idx; - gmx_allvsall_data_t *aadata; - real *p; - - snew(aadata, 1); - *p_aadata = aadata; - - /* Generate vdw params */ - snew(aadata->pvdwparam, ntype); - - for (i = 0; i < ntype; i++) - { - snew(aadata->pvdwparam[i], 2*natoms); - p = aadata->pvdwparam[i]; - - /* Lets keep it simple and use multiple steps - first create temp. c6/c12 arrays */ - for (j = 0; j < natoms; j++) - { - idx = i*ntype+type[j]; - p[2*j] = pvdwparam[2*idx]; - p[2*j+1] = pvdwparam[2*idx+1]; - } - } - - setup_exclusions_and_indices(aadata, excl, natoms); -} - - - -void -nb_kernel_allvsallgb(t_nblist gmx_unused * nlist, - rvec * xx, - rvec * ff, - struct t_forcerec * fr, - t_mdatoms * mdatoms, - nb_kernel_data_t * kernel_data, - t_nrnb * nrnb) -{ - gmx_allvsall_data_t *aadata; - int natoms; - int ni0, ni1; - int nj0, nj1, nj2; - int i, j, k; - real * charge; - int * type; - real facel; - real * pvdw; - int ggid; - int * mask; - real * GBtab; - real gbfactor; - real * invsqrta; - real * dvda; - real vgbtot, dvdasum; - int nnn, n0; - - real ix, iy, iz, iq; - real fix, fiy, fiz; - real jx, jy, jz, qq; - real dx, dy, dz; - real tx, ty, tz; - real rsq, rinv, rinvsq, rinvsix; - real vcoul, vctot; - real c6, c12, Vvdw6, Vvdw12, Vvdwtot; - real fscal, dvdatmp, fijC, vgb; - real Y, F, Fp, Geps, Heps2, VV, FF, eps, eps2, r, rt; - real dvdaj, gbscale, isaprod, isai, isaj, gbtabscale; - real * f; - real * x; - t_blocka * excl; - real * Vvdw; - real * Vc; - real * vpol; - - x = xx[0]; - f = ff[0]; - charge = mdatoms->chargeA; - type = mdatoms->typeA; - gbfactor = ((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - facel = fr->ic->epsfac; - GBtab = fr->gbtab->data; - gbtabscale = fr->gbtab->scale; - invsqrta = fr->invsqrta; - dvda = fr->dvda; - vpol = kernel_data->energygrp_polarization; - - natoms = mdatoms->nr; - ni0 = 0; - ni1 = mdatoms->homenr; - - aadata = reinterpret_cast(fr->AllvsAll_work); - excl = kernel_data->exclusions; - - Vc = kernel_data->energygrp_elec; - Vvdw = kernel_data->energygrp_vdw; - - if (aadata == NULL) - { - setup_aadata(&aadata, excl, natoms, type, fr->ntype, fr->nbfp); - fr->AllvsAll_work = aadata; - } - - for (i = ni0; i < ni1; i++) - { - /* We assume shifts are NOT used for all-vs-all interactions */ - - /* Load i atom data */ - ix = x[3*i]; - iy = x[3*i+1]; - iz = x[3*i+2]; - iq = facel*charge[i]; - - isai = invsqrta[i]; - - pvdw = aadata->pvdwparam[type[i]]; - - /* Zero the potential energy for this list */ - Vvdwtot = 0.0; - vctot = 0.0; - vgbtot = 0.0; - dvdasum = 0.0; - - /* Clear i atom forces */ - fix = 0.0; - fiy = 0.0; - fiz = 0.0; - - /* Load limits for loop over neighbors */ - nj0 = aadata->jindex[3*i]; - nj1 = aadata->jindex[3*i+1]; - nj2 = aadata->jindex[3*i+2]; - - mask = aadata->exclusion_mask[i]; - - /* Prologue part, including exclusion mask */ - for (j = nj0; j < nj1; j++, mask++) - { - if (*mask != 0) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - rsq = dx*dx+dy*dy+dz*dz; - - /* Calculate 1/r and 1/r2 */ - rinv = 1.0/sqrt(rsq); - - /* Load parameters for j atom */ - isaj = invsqrta[k]; - isaprod = isai*isaj; - qq = iq*charge[k]; - vcoul = qq*rinv; - fscal = vcoul*rinv; - qq = isaprod*(-qq)*gbfactor; - gbscale = isaprod*gbtabscale; - c6 = pvdw[2*k]; - c12 = pvdw[2*k+1]; - rinvsq = rinv*rinv; - - /* Tabulated Generalized-Born interaction */ - dvdaj = dvda[k]; - r = rsq*rinv; - - /* Calculate table index */ - rt = r*gbscale; - n0 = rt; - eps = rt-n0; - eps2 = eps*eps; - nnn = 4*n0; - Y = GBtab[nnn]; - F = GBtab[nnn+1]; - Geps = eps*GBtab[nnn+2]; - Heps2 = eps2*GBtab[nnn+3]; - Fp = F+Geps+Heps2; - VV = Y+eps*Fp; - FF = Fp+Geps+2.0*Heps2; - vgb = qq*VV; - fijC = qq*FF*gbscale; - dvdatmp = -0.5*(vgb+fijC*r); - dvdasum = dvdasum + dvdatmp; - dvda[k] = dvdaj+dvdatmp*isaj*isaj; - vctot = vctot + vcoul; - vgbtot = vgbtot + vgb; - - /* Lennard-Jones interaction */ - rinvsix = rinvsq*rinvsq*rinvsq; - Vvdw6 = c6*rinvsix; - Vvdw12 = c12*rinvsix*rinvsix; - Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; - fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv; - - /* Calculate temporary vectorial force */ - tx = fscal*dx; - ty = fscal*dy; - tz = fscal*dz; - - /* Increment i atom force */ - fix = fix + tx; - fiy = fiy + ty; - fiz = fiz + tz; - - /* Decrement j atom force */ - f[3*k] = f[3*k] - tx; - f[3*k+1] = f[3*k+1] - ty; - f[3*k+2] = f[3*k+2] - tz; - } - /* Inner loop uses 38 flops/iteration */ - } - - /* Main part, no exclusions */ - for (j = nj1; j < nj2; j++) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - rsq = dx*dx+dy*dy+dz*dz; - - /* Calculate 1/r and 1/r2 */ - rinv = 1.0/sqrt(rsq); - - /* Load parameters for j atom */ - isaj = invsqrta[k]; - isaprod = isai*isaj; - qq = iq*charge[k]; - vcoul = qq*rinv; - fscal = vcoul*rinv; - qq = isaprod*(-qq)*gbfactor; - gbscale = isaprod*gbtabscale; - c6 = pvdw[2*k]; - c12 = pvdw[2*k+1]; - rinvsq = rinv*rinv; - - /* Tabulated Generalized-Born interaction */ - dvdaj = dvda[k]; - r = rsq*rinv; - - /* Calculate table index */ - rt = r*gbscale; - n0 = rt; - eps = rt-n0; - eps2 = eps*eps; - nnn = 4*n0; - Y = GBtab[nnn]; - F = GBtab[nnn+1]; - Geps = eps*GBtab[nnn+2]; - Heps2 = eps2*GBtab[nnn+3]; - Fp = F+Geps+Heps2; - VV = Y+eps*Fp; - FF = Fp+Geps+2.0*Heps2; - vgb = qq*VV; - fijC = qq*FF*gbscale; - dvdatmp = -0.5*(vgb+fijC*r); - dvdasum = dvdasum + dvdatmp; - dvda[k] = dvdaj+dvdatmp*isaj*isaj; - vctot = vctot + vcoul; - vgbtot = vgbtot + vgb; - - /* Lennard-Jones interaction */ - rinvsix = rinvsq*rinvsq*rinvsq; - Vvdw6 = c6*rinvsix; - Vvdw12 = c12*rinvsix*rinvsix; - Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; - fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv; - - /* Calculate temporary vectorial force */ - tx = fscal*dx; - ty = fscal*dy; - tz = fscal*dz; - - /* Increment i atom force */ - fix = fix + tx; - fiy = fiy + ty; - fiz = fiz + tz; - - /* Decrement j atom force */ - f[3*k] = f[3*k] - tx; - f[3*k+1] = f[3*k+1] - ty; - f[3*k+2] = f[3*k+2] - tz; - - /* Inner loop uses 38 flops/iteration */ - } - - f[3*i] += fix; - f[3*i+1] += fiy; - f[3*i+2] += fiz; - - /* Add potential energies to the group for this list */ - ggid = 0; - - Vc[ggid] = Vc[ggid] + vctot; - Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; - vpol[ggid] = vpol[ggid] + vgbtot; - dvda[i] = dvda[i] + dvdasum*isai*isai; - - /* Outer loop uses 6 flops/iteration */ - } - - /* 12 flops per outer iteration - * 19 flops per inner iteration - */ - inc_nrnb(nrnb, eNR_NBKERNEL_ELEC_VDW_VF, (ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h deleted file mode 100644 index 943af5c486..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#ifndef _NB_KERNEL_ALLVSALLGB_H -#define _NB_KERNEL_ALLVSALLGB_H - -#include "config.h" - -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gmxlib/nonbonded/nb_kernel.h" -#include "gromacs/math/vectypes.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/nblist.h" - -void -nb_kernel_allvsallgb(t_nblist * nlist, - rvec * x, - rvec * f, - struct t_forcerec * fr, - t_mdatoms * mdatoms, - nb_kernel_data_t * kernel_data, - t_nrnb * nrnb); - -#endif diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c index 11c173e2a9..250ee29c48 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c @@ -57,6 +57,46 @@ nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c; nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c; nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c; nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_c; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_c; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_c; @@ -177,94 +217,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c; nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c; nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c; nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c; -nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c; -nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c; -nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c; @@ -365,6 +317,46 @@ nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c; nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c; nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c; nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c; +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c; nb_kernel_info_t @@ -388,6 +380,46 @@ nb_kernel_info_t { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_c", "c", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_c", "c", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_c", "c", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" }, @@ -508,94 +540,6 @@ nb_kernel_info_t { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" }, { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" }, { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" }, @@ -695,7 +639,47 @@ nb_kernel_info_t { nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" }, { nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" } + { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" } }; int diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre index 20cf4af2ef..27daeab020 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre @@ -2,7 +2,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -143,18 +143,13 @@ void real velec,felec,velecsum,facel,crf,krf,krf2; real *charge; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - int gbitab; - real vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - real *invsqrta,*dvda,*gbtab; - /* #endif */ /* #if KERNEL_VDW != 'None' */ int nvdwtype; real rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6; int *vdwtype; real *vdwparam; /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ int vfitab; real rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF; real *vftab; @@ -232,14 +227,6 @@ void /* #endif */ /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = fr->gbtab->scale; - gbtab = fr->gbtab->data; - gbinvepsdiff = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -358,9 +345,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = facel*charge[inr+{I}]; /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = invsqrta[inr+{I}]; - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = {NVDWPARAM}*nvdwtype*vdwtype[inr+{I}]; @@ -372,16 +356,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = 0.0; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = 0.0; - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = 0.0; /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = 0.0; - /* #endif */ /* Start inner kernel loop */ for(jidx=j_index_start; jidxenergygrp_elec[ggid] += velecsum; /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - kernel_data->energygrp_polarization[ggid] += vgbsum; - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ kernel_data->energygrp_vdw[ggid] += vvdwsum; /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvda[inr] = dvda[inr] + dvdasum*isai{I}*isai{I}; - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py index 6352ce3c0d..582044c2b7 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c deleted file mode 100644 index 4b3e5b68f0..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c +++ /dev/null @@ -1,820 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0)); - gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*95); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0)); - gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*74); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); - real *invsqrta,*dvda,*gbtab; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0)); - gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*61); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; - _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); - real *invsqrta,*dvda,*gbtab; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxinvsqrta; - dvda = fr->dvda; - gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -358,9 +345,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = gmx_fjsp_load1_v2r8(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -372,16 +356,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _fjsp_setzero_v2r8(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _fjsp_setzero_v2r8(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _fjsp_setzero_v2r8(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _fjsp_setzero_v2r8(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -488,13 +466,6 @@ void /* #else */ jq{J} = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+{J}); /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if ROUND =='Loop' */ - isaj{J} = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+{J},invsqrta+jnrB+{J}); - /* #else */ - isaj{J} = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+{J}); - /* #endif */ - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -600,61 +571,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _fjsp_mul_v2r8(isai{I},isaj{J}); - gbqqfactor = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq{I}{J},_fjsp_mul_v2r8(isaprod,gbinvepsdiff))); - gbscale = _fjsp_mul_v2r8(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _fjsp_mul_v2r8(r{I}{J},gbscale); - itab_tmp = _fjsp_dtox_v2r8(rt); - gbeps = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp)); - _fjsp_store_v2r8(&gbconv.simd,itab_tmp); - - Y = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] ); - /* #if ROUND == 'Loop' */ - F = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] ); - /* #else */ - F = _fjsp_setzero_v2r8(); - /* #endif */ - GMX_FJSP_TRANSPOSE2_V2R8(Y,F); - G = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2); - /* #if ROUND == 'Loop' */ - H = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2); - /* #else */ - H = _fjsp_setzero_v2r8(); - /* #endif */ - GMX_FJSP_TRANSPOSE2_V2R8(G,H); - Fp = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F); - VV = _fjsp_madd_v2r8(gbeps,Fp,Y); - vgb = _fjsp_mul_v2r8(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - twogbeps = _fjsp_add_v2r8(gbeps,gbeps); - FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp); - fgb = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale)); - dvdatmp = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r{I}{J},vgb)); - dvdasum = _fjsp_add_v2r8(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj{J},isaj{J}))); - /* #else */ - gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj{J},isaj{J}))); - /* #endif */ - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _fjsp_mul_v2r8(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv{I}{J},fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -937,17 +853,6 @@ void /* #endif */ velecsum = _fjsp_add_v2r8(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _fjsp_and_v2r8(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _fjsp_unpacklo_v2r8(vgb,_fjsp_setzero_v2r8()); - /* #endif */ - vgbsum = _fjsp_add_v2r8(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ @@ -1086,19 +991,11 @@ void gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai{I},isai{I})); - gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py index 0fb6d3c14c..ab3cce67ce 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c deleted file mode 100644 index 9f96a56a04..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c +++ /dev/null @@ -1,838 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse2_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse2_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse2_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse2_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxinvsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -360,9 +346,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm_load1_pd(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -374,16 +357,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm_setzero_pd(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm_setzero_pd(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm_setzero_pd(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_setzero_pd(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -490,13 +467,6 @@ void /* #else */ jq{J} = _mm_load_sd(charge+jnrA+{J}); /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if ROUND =='Loop' */ - isaj{J} = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J}); - /* #else */ - isaj{J} = _mm_load_sd(invsqrta+jnrA+{J}); - /* #endif */ - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -604,64 +574,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_pd(isai{I},isaj{J}); - gbqqfactor = _mm_xor_pd(signbit,_mm_mul_pd(qq{I}{J},_mm_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_pd(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_pd(r{I}{J},gbscale); - gbitab = _mm_cvttpd_epi32(rt); - gbeps = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - /* #if ROUND == 'Loop' */ - F = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - /* #else */ - F = _mm_setzero_pd(); - /* #endif */ - GMX_MM_TRANSPOSE2_PD(Y,F); - G = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2); - /* #if ROUND == 'Loop' */ - H = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2); - /* #else */ - H = _mm_setzero_pd(); - /* #endif */ - GMX_MM_TRANSPOSE2_PD(G,H); - Heps = _mm_mul_pd(gbeps,H); - Fp = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps))); - VV = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp)); - vgb = _mm_mul_pd(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps)))); - fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale)); - dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r{I}{J}))); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd()); - /* #endif */ - dvdasum = _mm_add_pd(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J}))); - /* #else */ - gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J}))); - /* #endif */ - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _mm_mul_pd(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv{I}{J}),fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -946,17 +858,6 @@ void /* #endif */ velecsum = _mm_add_pd(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm_and_pd(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm_unpacklo_pd(vgb,_mm_setzero_pd()); - /* #endif */ - vgbsum = _mm_add_pd(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ @@ -1097,19 +998,11 @@ void gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai{I},isai{I})); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py index 1fa8fee809..02701e1ac3 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c deleted file mode 100644 index 2dd1b08824..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c +++ /dev/null @@ -1,958 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse2_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse2_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw6 = _mm_mul_ps(c6_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw12 = _mm_mul_ps(c12_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - vvdw = _mm_add_ps(vvdw12,vvdw6); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 92 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw6 = _mm_mul_ps(c6_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw12 = _mm_mul_ps(c12_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - vvdw = _mm_add_ps(vvdw12,vvdw6); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdw = _mm_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 93 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*93); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 82 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 83 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*83); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c deleted file mode 100644 index 137dd21fa8..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse2_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse2_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm_mul_ps(c6_00,rinvsix); - vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix)); - vvdw = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) ); - fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 71 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm_mul_ps(c6_00,rinvsix); - vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix)); - vvdw = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) ); - fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdw = _mm_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 72 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*72); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 64 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 65 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*65); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c deleted file mode 100644 index 36268cd47e..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c +++ /dev/null @@ -1,761 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse2_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse2_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 58 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - - fscal = felec; - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 59 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*59); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 56 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse2_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - fscal = felec; - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 57 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*57); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c index 324c1cee56..7ecc4a33da 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_single; nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_single; @@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single; @@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single; nb_kernel_info_t @@ -294,6 +288,36 @@ nb_kernel_info_t { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" }, @@ -384,72 +408,6 @@ nb_kernel_info_t { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" }, @@ -519,7 +477,37 @@ nb_kernel_info_t { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } + { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } }; int diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre index 2ee376dcac..556215e2df 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre @@ -2,7 +2,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -131,12 +131,6 @@ void __m128 velec,felec,velecsum,facel,crf,krf,krf2; real *charge; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - /* #endif */ /* #if KERNEL_VDW != 'None' */ int nvdwtype; __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; @@ -145,7 +139,7 @@ void __m128 one_sixth = _mm_set1_ps(1.0/6.0); __m128 one_twelfth = _mm_set1_ps(1.0/12.0); /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ __m128i vfitab; __m128i ifour = _mm_set1_epi32(4); __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; @@ -229,14 +223,6 @@ void /* #endif */ /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -370,9 +356,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm_load1_ps(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -384,16 +367,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm_setzero_ps(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm_setzero_ps(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm_setzero_ps(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_setzero_ps(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -498,10 +475,6 @@ void /* #for J in PARTICLES_ELEC_J */ jq{J} = gmx_mm_load_4real_swizzle_ps(charge+jnrA+{J},charge+jnrB+{J}, charge+jnrC+{J},charge+jnrD+{J}); - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isaj{J} = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J}, - invsqrta+jnrC+{J},invsqrta+jnrD+{J}); - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -610,63 +583,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai{I},isaj{J}); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq{I}{J},_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r{I}{J},gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r{I}{J}))); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - /* #endif */ - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - /* #else */ - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - /* #endif */ - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj{J},isaj{J}))); - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _mm_mul_ps(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv{I}{J}),fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -913,17 +829,6 @@ void /* #endif */ velecsum = _mm_add_ps(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm_and_ps(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm_andnot_ps(dummy_mask,vgb); - /* #endif */ - vgbsum = _mm_add_ps(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */ @@ -1075,19 +980,11 @@ void gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai{I},isai{I})); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py index ed5ac7fcae..d24ea9b0bf 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c deleted file mode 100644 index 316e11ee85..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c +++ /dev/null @@ -1,838 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse4_1_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse4_1_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_pd(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse4_1_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128d one_sixth = _mm_set1_pd(1.0/6.0); - __m128d one_twelfth = _mm_set1_pd(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse4_1_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0)); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128d velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128d vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp; - __m128d minushalf = _mm_set1_pd(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128d rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128d dummy_mask,cutoff_mask; - __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) ); - __m128d one = _mm_set1_pd(1.0); - __m128d two = _mm_set1_pd(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_pd(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxinvsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_pd(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -360,9 +346,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm_load1_pd(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -374,16 +357,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm_setzero_pd(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm_setzero_pd(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm_setzero_pd(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_setzero_pd(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -490,13 +467,6 @@ void /* #else */ jq{J} = _mm_load_sd(charge+jnrA+{J}); /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if ROUND =='Loop' */ - isaj{J} = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J}); - /* #else */ - isaj{J} = _mm_load_sd(invsqrta+jnrA+{J}); - /* #endif */ - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -603,64 +573,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_pd(isai{I},isaj{J}); - gbqqfactor = _mm_xor_pd(signbit,_mm_mul_pd(qq{I}{J},_mm_mul_pd(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_pd(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_pd(r{I}{J},gbscale); - gbitab = _mm_cvttpd_epi32(rt); - gbeps = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - - Y = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - /* #if ROUND == 'Loop' */ - F = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - /* #else */ - F = _mm_setzero_pd(); - /* #endif */ - GMX_MM_TRANSPOSE2_PD(Y,F); - G = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2); - /* #if ROUND == 'Loop' */ - H = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2); - /* #else */ - H = _mm_setzero_pd(); - /* #endif */ - GMX_MM_TRANSPOSE2_PD(G,H); - Heps = _mm_mul_pd(gbeps,H); - Fp = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps))); - VV = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp)); - vgb = _mm_mul_pd(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps)))); - fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale)); - dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r{I}{J}))); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd()); - /* #endif */ - dvdasum = _mm_add_pd(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J}))); - /* #else */ - gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J}))); - /* #endif */ - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _mm_mul_pd(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv{I}{J}),fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -946,17 +858,6 @@ void /* #endif */ velecsum = _mm_add_pd(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm_and_pd(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm_unpacklo_pd(vgb,_mm_setzero_pd()); - /* #endif */ - vgbsum = _mm_add_pd(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ @@ -1097,19 +998,11 @@ void gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai{I},isai{I})); - gmx_mm_update_1pot_pd(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py index 1a29580a5e..f9184f4b9f 100755 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py @@ -2,7 +2,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -105,7 +105,6 @@ ElectrostaticsList = { 'None' : [], 'Coulomb' : ['rinv','rinvsq'], 'ReactionField' : ['rinv','rinvsq'], - 'GeneralizedBorn' : ['rinv','r'], 'CubicSplineTable' : ['rinv','r','table'], 'Ewald' : ['rinv','rinvsq','r'], } @@ -190,7 +189,6 @@ Abbreviation = { 'Coulomb' : 'Coul', 'Ewald' : 'Ew', 'ReactionField' : 'RF', - 'GeneralizedBorn' : 'GB', 'CubicSplineTable' : 'CSTab', 'LennardJones' : 'LJ', 'Buckingham' : 'Bham', @@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel return 0 # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + if('Water' in KernelGeom[0] and KernelElec=='None'): return 0 # Non-matching table settings are pointless diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c deleted file mode 100644 index 88351ac65a..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c +++ /dev/null @@ -1,954 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse4_1_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse4_1_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw6 = _mm_mul_ps(c6_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw12 = _mm_mul_ps(c12_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - vvdw = _mm_add_ps(vvdw12,vvdw6); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 92 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw6 = _mm_mul_ps(c6_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp)); - vvdw12 = _mm_mul_ps(c12_00,VV); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - vvdw = _mm_add_ps(vvdw12,vvdw6); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdw = _mm_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 93 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*93); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = _mm_set1_ps(kernel_data->table_vdw->scale); - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 82 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* Calculate table index by multiplying r with table scale and truncate to integer */ - rt = _mm_mul_ps(r00,vftabscale); - vfitab = _mm_cvttps_epi32(rt); - vfeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - vfitab = _mm_slli_epi32(vfitab,3); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* CUBIC SPLINE TABLE DISPERSION */ - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw6 = _mm_mul_ps(c6_00,FF); - - /* CUBIC SPLINE TABLE REPULSION */ - vfitab = _mm_add_epi32(vfitab,ifour); - Y = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) ); - F = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) ); - G = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) ); - H = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(vfeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps))); - FF = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fvdw12 = _mm_mul_ps(c12_00,FF); - fvdw = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00))); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 83 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*83); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c deleted file mode 100644 index 8437ac268d..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c +++ /dev/null @@ -1,852 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse4_1_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse4_1_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm_mul_ps(c6_00,rinvsix); - vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix)); - vvdw = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) ); - fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 71 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - vvdw6 = _mm_mul_ps(c6_00,rinvsix); - vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix)); - vvdw = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) ); - fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - vvdw = _mm_andnot_ps(dummy_mask,vvdw); - vvdwsum = _mm_add_ps(vvdwsum,vvdw); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 72 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 10 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*72); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - int nvdwtype; - __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - __m128 one_sixth = _mm_set1_ps(1.0/6.0); - __m128 one_twelfth = _mm_set1_ps(1.0/12.0); - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm_add_ps(felec,fvdw); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 64 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - rinvsq00 = _mm_mul_ps(rinv00,rinv00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - vdwjidx0A = 2*vdwtype[jnrA+0]; - vdwjidx0B = 2*vdwtype[jnrB+0]; - vdwjidx0C = 2*vdwtype[jnrC+0]; - vdwjidx0D = 2*vdwtype[jnrD+0]; - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A, - vdwparam+vdwioffset0+vdwjidx0B, - vdwparam+vdwioffset0+vdwjidx0C, - vdwparam+vdwioffset0+vdwjidx0D, - &c6_00,&c12_00); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* LENNARD-JONES DISPERSION/REPULSION */ - - rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00); - fvdw = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00)); - - fscal = _mm_add_ps(felec,fvdw); - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 65 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*65); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c deleted file mode 100644 index 3b150dddcb..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sse4_1_single kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_x86_sse4_1_single.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velecsum = _mm_add_ps(velecsum,velec); - vgbsum = _mm_add_ps(vgbsum,vgb); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 58 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - /* Update potential sum for this i atom from the interaction with this j atom. */ - velec = _mm_andnot_ps(dummy_mask,velec); - velecsum = _mm_add_ps(velecsum,velec); - vgb = _mm_andnot_ps(dummy_mask,vgb); - vgbsum = _mm_add_ps(vgbsum,vgb); - - fscal = felec; - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 59 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - ggid = gid[iidx]; - /* Update potential energies */ - gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*59); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single - * Electrostatics interaction: GeneralizedBorn - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB,jnrC,jnrD; - int jnrlistA,jnrlistB,jnrlistC,jnrlistD; - int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - real *fjptrA,*fjptrB,*fjptrC,*fjptrD; - real scratch[4*DIM]; - __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D; - __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - __m128 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - __m128i vfitab; - __m128i ifour = _mm_set1_epi32(4); - __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; - real *vftab; - __m128 dummy_mask,cutoff_mask; - __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) ); - __m128 one = _mm_set1_ps(1.0); - __m128 two = _mm_set1_ps(2.0); - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = _mm_set1_ps(fr->ic->epsfac); - charge = mdatoms->chargeA; - - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = jnrC = jnrD = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - j_coord_offsetC = 0; - j_coord_offsetD = 0; - - outeriter = 0; - inneriter = 0; - - for(iidx=0;iidx<4*DIM;iidx++) - { - scratch[iidx] = 0.0; - } - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx=0; jidx+=4) - { - - /* Get j neighbor index, and coordinate index */ - jnrA = jjnr[jidx]; - jnrB = jjnr[jidx+1]; - jnrC = jjnr[jidx+2]; - jnrD = jjnr[jidx+3]; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - fscal = felec; - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = f+j_coord_offsetA; - fjptrB = f+j_coord_offsetB; - fjptrC = f+j_coord_offsetC; - fjptrD = f+j_coord_offsetD; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 56 flops */ - } - - if(jidx=0) ? jnrlistA : 0; - jnrB = (jnrlistB>=0) ? jnrlistB : 0; - jnrC = (jnrlistC>=0) ? jnrlistC : 0; - jnrD = (jnrlistD>=0) ? jnrlistD : 0; - j_coord_offsetA = DIM*jnrA; - j_coord_offsetB = DIM*jnrB; - j_coord_offsetC = DIM*jnrC; - j_coord_offsetD = DIM*jnrD; - - /* load j atom coordinates */ - gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB, - x+j_coord_offsetC,x+j_coord_offsetD, - &jx0,&jy0,&jz0); - - /* Calculate displacement vector */ - dx00 = _mm_sub_ps(ix0,jx0); - dy00 = _mm_sub_ps(iy0,jy0); - dz00 = _mm_sub_ps(iz0,jz0); - - /* Calculate squared distance and things based on it */ - rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00); - - rinv00 = sse41_invsqrt_f(rsq00); - - /* Load parameters for j particles */ - jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0, - charge+jnrC+0,charge+jnrD+0); - isaj0 = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0, - invsqrta+jnrC+0,invsqrta+jnrD+0); - - /************************** - * CALCULATE INTERACTIONS * - **************************/ - - r00 = _mm_mul_ps(rsq00,rinv00); - r00 = _mm_andnot_ps(dummy_mask,r00); - - /* Compute parameters for interactions between i and j atoms */ - qq00 = _mm_mul_ps(iq0,jq0); - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai0,isaj0); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r00,gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00))); - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0))); - velec = _mm_mul_ps(qq00,rinv00); - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00); - - fscal = felec; - - fscal = _mm_andnot_ps(dummy_mask,fscal); - - /* Calculate temporary vectorial force */ - tx = _mm_mul_ps(fscal,dx00); - ty = _mm_mul_ps(fscal,dy00); - tz = _mm_mul_ps(fscal,dz00); - - /* Update vectorial force */ - fix0 = _mm_add_ps(fix0,tx); - fiy0 = _mm_add_ps(fiy0,ty); - fiz0 = _mm_add_ps(fiz0,tz); - - fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch; - fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch; - fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch; - fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch; - gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz); - - /* Inner loop uses 57 flops */ - } - - /* End of innermost loop */ - - gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0, - f+i_coord_offset,fshift+i_shift_offset); - - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0)); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*57); -} diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c index e2f9d452a5..c339914a02 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_single; nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_single; @@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single; -nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single; nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single; @@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single; nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single; +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single; nb_kernel_info_t @@ -294,6 +288,36 @@ nb_kernel_info_t { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" }, @@ -384,72 +408,6 @@ nb_kernel_info_t { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" }, { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, - { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" }, { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" }, @@ -519,7 +477,37 @@ nb_kernel_info_t { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, - { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } + { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" }, + { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" } }; int diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre index f66ab5e5da..39050e6121 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre @@ -2,7 +2,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -131,12 +131,6 @@ void __m128 velec,felec,velecsum,facel,crf,krf,krf2; real *charge; /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - __m128i gbitab; - __m128 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp; - __m128 minushalf = _mm_set1_ps(-0.5); - real *invsqrta,*dvda,*gbtab; - /* #endif */ /* #if KERNEL_VDW != 'None' */ int nvdwtype; __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; @@ -145,7 +139,7 @@ void __m128 one_sixth = _mm_set1_ps(1.0/6.0); __m128 one_twelfth = _mm_set1_ps(1.0/12.0); /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ __m128i vfitab; __m128i ifour = _mm_set1_epi32(4); __m128 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF; @@ -229,14 +223,6 @@ void /* #endif */ /* #endif */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - invsqrta = fr->invsqrta; - dvda = fr->dvda; - gbtabscale = _mm_set1_ps(fr->gbtab->scale); - gbtab = fr->gbtab->data; - gbinvepsdiff = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); - /* #endif */ - /* #if 'Water' in GEOMETRY_I */ /* Setup water-specific parameters */ inr = nlist->iinr[0]; @@ -370,9 +356,6 @@ void /* #for I in PARTICLES_ELEC_I */ iq{I} = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+{I})); /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isai{I} = _mm_load1_ps(invsqrta+inr+{I}); - /* #endif */ /* #endfor */ /* #for I in PARTICLES_VDW_I */ vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; @@ -384,16 +367,10 @@ void /* #if KERNEL_ELEC != 'None' */ velecsum = _mm_setzero_ps(); /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - vgbsum = _mm_setzero_ps(); - /* #endif */ /* #if KERNEL_VDW != 'None' */ vvdwsum = _mm_setzero_ps(); /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_setzero_ps(); - /* #endif */ /* #for ROUND in ['Loop','Epilogue'] */ @@ -498,10 +475,6 @@ void /* #for J in PARTICLES_ELEC_J */ jq{J} = gmx_mm_load_4real_swizzle_ps(charge+jnrA+{J},charge+jnrB+{J}, charge+jnrC+{J},charge+jnrD+{J}); - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - isaj{J} = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J}, - invsqrta+jnrC+{J},invsqrta+jnrD+{J}); - /* #endif */ /* #endfor */ /* #for J in PARTICLES_VDW_J */ vdwjidx{J}A = 2*vdwtype[jnrA+{J}]; @@ -611,62 +584,6 @@ void /* #define INNERFLOPS INNERFLOPS+3 */ /* #endif */ - /* #elif KERNEL_ELEC=='GeneralizedBorn' */ - - /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */ - isaprod = _mm_mul_ps(isai{I},isaj{J}); - gbqqfactor = _mm_xor_ps(signbit,_mm_mul_ps(qq{I}{J},_mm_mul_ps(isaprod,gbinvepsdiff))); - gbscale = _mm_mul_ps(isaprod,gbtabscale); - /* #define INNERFLOPS INNERFLOPS+5 */ - - /* Calculate generalized born table index - this is a separate table from the normal one, - * but we use the same procedure by multiplying r with scale and truncating to integer. - */ - rt = _mm_mul_ps(r{I}{J},gbscale); - gbitab = _mm_cvttps_epi32(rt); - gbeps = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR)); - gbitab = _mm_slli_epi32(gbitab,2); - Y = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) ); - F = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) ); - G = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) ); - H = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) ); - _MM_TRANSPOSE4_PS(Y,F,G,H); - Heps = _mm_mul_ps(gbeps,H); - Fp = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps))); - VV = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp)); - vgb = _mm_mul_ps(gbqqfactor,VV); - /* #define INNERFLOPS INNERFLOPS+10 */ - - /* #if 'Force' in KERNEL_VF */ - FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps)))); - fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale)); - dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r{I}{J}))); - /* #if ROUND == 'Epilogue' */ - dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp); - /* #endif */ - dvdasum = _mm_add_ps(dvdasum,dvdatmp); - /* #if ROUND == 'Loop' */ - fjptrA = dvda+jnrA; - fjptrB = dvda+jnrB; - fjptrC = dvda+jnrC; - fjptrD = dvda+jnrD; - /* #else */ - /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */ - fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch; - fjptrB = (jnrlistB>=0) ? dvda+jnrB : scratch; - fjptrC = (jnrlistC>=0) ? dvda+jnrC : scratch; - fjptrD = (jnrlistD>=0) ? dvda+jnrD : scratch; - /* #endif */ - gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj{J},isaj{J}))); - /* #define INNERFLOPS INNERFLOPS+13 */ - /* #endif */ - velec = _mm_mul_ps(qq{I}{J},rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if 'Force' in KERNEL_VF */ - felec = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv{I}{J}),fgb),rinv{I}{J}); - /* #define INNERFLOPS INNERFLOPS+3 */ - /* #endif */ - /* #elif KERNEL_ELEC=='Ewald' */ /* EWALD ELECTROSTATICS */ @@ -913,17 +830,6 @@ void /* #endif */ velecsum = _mm_add_ps(velecsum,velec); /* #define INNERFLOPS INNERFLOPS+1 */ - /* #if KERNEL_ELEC=='GeneralizedBorn' */ - /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */ - vgb = _mm_and_ps(vgb,cutoff_mask); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ - /* #if ROUND == 'Epilogue' */ - vgb = _mm_andnot_ps(dummy_mask,vgb); - /* #endif */ - vgbsum = _mm_add_ps(vgbsum,vgb); - /* #define INNERFLOPS INNERFLOPS+1 */ - /* #endif */ /* #endif */ /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ /* ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */ @@ -1075,19 +981,11 @@ void gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC */ - gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ /* #if KERNEL_VDW != 'None' */ gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid); /* #define OUTERFLOPS OUTERFLOPS+1 */ /* #endif */ /* #endif */ - /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ - dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai{I},isai{I})); - gmx_mm_update_1pot_ps(dvdasum,dvda+inr); - /* #endif */ /* Increment number of inner iterations */ inneriter += j_index_end - j_index_start; diff --git a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp index 2d683c34cb..aba9e8f735 100644 --- a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp +++ b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -388,7 +388,6 @@ void do_nonbonded(t_forcerec *fr, } kernel_data.energygrp_elec = grppener->ener[egCOULSR]; kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR]; - kernel_data.energygrp_polarization = grppener->ener[egGB]; nlist = nblists->nlist_sr; f = f_shortrange; } diff --git a/src/gromacs/gmxlib/nrnb.cpp b/src/gromacs/gmxlib/nrnb.cpp index 4bc3f67230..ba5ec1da26 100644 --- a/src/gromacs/gmxlib/nrnb.cpp +++ b/src/gromacs/gmxlib/nrnb.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -85,7 +85,6 @@ static const t_nrnb_data nbdata[eNRNB] = { { "NB Generic charge grp kernel", 1 }, { "NB Free energy kernel", 1 }, { "NB All-vs-all", 1 }, - { "NB All-vs-all, GB", 1 }, { "Pair Search distance check", 9 }, /* nbnxn pair dist. check */ /* nbnxn kernel flops are based on inner-loops without exclusion checks. @@ -120,12 +119,6 @@ static const t_nrnb_data nbdata[eNRNB] = { { "NxN LJ add LJ Ewald [F]", 36 }, /* extra cost for LJ Ewald */ { "NxN LJ add LJ Ewald [V&F]", 33 }, { "1,4 nonbonded interactions", 90 }, - { "Born radii (Still)", 47 }, - { "Born radii (HCT/OBC)", 183 }, - { "Born force chain rule", 15 }, - { "All-vs-All Still radii", 1 }, - { "All-vs-All HCT/OBC radii", 1 }, - { "All-vs-All Born chain rule", 1 }, { "Calc Weights", 36 }, { "Spread Q", 6 }, { "Spread Q Bspline", 2 }, @@ -189,7 +182,6 @@ static const t_nrnb_data nbdata[eNRNB] = { { "Virtual Site 4fd", 110 }, { "Virtual Site 4fdn", 254 }, { "Virtual Site N", 15 }, - { "Mixed Generalized Born stuff", 10 }, { "CMAP", 1700 }, // Estimate! { "Urey-Bradley", 183 }, { "Cross-Bond-Bond", 163 }, @@ -334,7 +326,7 @@ void print_flop(FILE *out, t_nrnb *nrnb, double *nbfs, double *mflop) const char *myline = "-----------------------------------------------------------------------------"; *nbfs = 0.0; - for (i = 0; (i < eNR_NBKERNEL_ALLVSALLGB); i++) + for (i = 0; (i < eNR_NBKERNEL_TOTAL_NR); i++) { if (std::strstr(nbdata[i].name, "W3-W3") != nullptr) { @@ -574,7 +566,7 @@ void pr_load(FILE *log, t_commrec *cr, t_nrnb nrnb[]) { add_nrnb(av, av, &(nrnb[i])); /* Cost due to forces */ - for (j = 0; (j < eNR_NBKERNEL_ALLVSALLGB); j++) + for (j = 0; (j < eNR_NBKERNEL_TOTAL_NR); j++) { ftot[i] += nrnb[i].n[j]*cost_nrnb(j); } diff --git a/src/gromacs/gmxlib/nrnb.h b/src/gromacs/gmxlib/nrnb.h index 5cc1d8a7d5..53ee66765d 100644 --- a/src/gromacs/gmxlib/nrnb.h +++ b/src/gromacs/gmxlib/nrnb.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -75,8 +75,8 @@ enum eNR_NBKERNEL_GENERIC_CG, eNR_NBKERNEL_FREE_ENERGY, /* Add other generic kernels _before_ the free energy one */ - eNR_NBKERNEL_ALLVSALL, - eNR_NBKERNEL_ALLVSALLGB, + eNR_NBKERNEL_TOTAL_NR, + eNR_NBKERNEL_ALLVSALL = eNR_NBKERNEL_TOTAL_NR, // Reuse the symbolic constant that indicates the last kernel eNR_NBNXN_DIST2, eNR_NBNXN_LJ_RF, eNR_NBNXN_LJ_RF_E, @@ -90,10 +90,6 @@ enum eNR_NBNXN_ADD_LJ_PSW, eNR_NBNXN_ADD_LJ_PSW_E, eNR_NBNXN_ADD_LJ_EWALD, eNR_NBNXN_ADD_LJ_EWALD_E, eNR_NB14, - eNR_BORN_RADII_STILL, eNR_BORN_RADII_HCT_OBC, - eNR_BORN_CHAINRULE, - eNR_BORN_AVA_RADII_STILL, eNR_BORN_AVA_RADII_HCT_OBC, - eNR_BORN_AVA_CHAINRULE, eNR_WEIGHTS, eNR_SPREAD, eNR_SPREADBSP, eNR_GATHERF, eNR_GATHERFBSP, eNR_FFT, eNR_CONV, eNR_SOLVEPME, eNR_NS, eNR_RESETX, @@ -115,7 +111,7 @@ enum eNR_SHAKE_RIJ, eNR_CONSTR_VIR, eNR_SETTLE, eNR_VSITE2, eNR_VSITE3, eNR_VSITE3FD, eNR_VSITE3FAD, eNR_VSITE3OUT, eNR_VSITE4FD, - eNR_VSITE4FDN, eNR_VSITEN, eNR_GB, + eNR_VSITE4FDN, eNR_VSITEN, eNR_CMAP, eNR_UREY_BRADLEY, eNR_CROSS_BOND_BOND, eNR_CROSS_BOND_ANGLE, eNRNB diff --git a/src/gromacs/gmxpreprocess/convparm.cpp b/src/gromacs/gmxpreprocess/convparm.cpp index 0a2f123d11..619e55b826 100644 --- a/src/gromacs/gmxpreprocess/convparm.cpp +++ b/src/gromacs/gmxpreprocess/convparm.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -442,14 +442,9 @@ assign_param(t_functype ftype, t_iparams *newparam, newparam->cmap.cmapA = static_cast(old[0]); newparam->cmap.cmapB = static_cast(old[1]); break; - case F_GB12: - case F_GB13: - case F_GB14: - newparam->gb.sar = old[0]; - newparam->gb.st = old[1]; - newparam->gb.pi = old[2]; - newparam->gb.gbr = old[3]; - newparam->gb.bmlt = old[4]; + case F_GB12_NOLONGERUSED: + case F_GB13_NOLONGERUSED: + case F_GB14_NOLONGERUSED: break; default: gmx_fatal(FARGS, "unknown function type %d in %s line %d", @@ -478,26 +473,9 @@ static int enter_params(gmx_ffparams_t *ffparams, t_functype ftype, { if (ffparams->functype[type] == ftype) { - if (F_GB13 == ftype) + if (memcmp(&newparam, &ffparams->iparams[type], (size_t)sizeof(newparam)) == 0) { - /* Occasionally, the way the 1-3 reference distance is - * computed can lead to non-binary-identical results, but I - * don't know why. */ - if ((gmx_within_tol(newparam.gb.sar, ffparams->iparams[type].gb.sar, 1e-6)) && - (gmx_within_tol(newparam.gb.st, ffparams->iparams[type].gb.st, 1e-6)) && - (gmx_within_tol(newparam.gb.pi, ffparams->iparams[type].gb.pi, 1e-6)) && - (gmx_within_tol(newparam.gb.gbr, ffparams->iparams[type].gb.gbr, 1e-6)) && - (gmx_within_tol(newparam.gb.bmlt, ffparams->iparams[type].gb.bmlt, 1e-6))) - { - return type; - } - } - else - { - if (memcmp(&newparam, &ffparams->iparams[type], (size_t)sizeof(newparam)) == 0) - { - return type; - } + return type; } } } diff --git a/src/gromacs/gmxpreprocess/gpp_atomtype.cpp b/src/gromacs/gmxpreprocess/gpp_atomtype.cpp index 68538e8426..50f7d7f124 100644 --- a/src/gromacs/gmxpreprocess/gpp_atomtype.cpp +++ b/src/gromacs/gmxpreprocess/gpp_atomtype.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2011,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -59,11 +59,6 @@ typedef struct gpp_atomtype { char ***atomname; /* Names of the atomtypes */ t_param *nb; /* Nonbonded force default params */ int *bondatomtype; /* The bond_atomtype for each atomtype */ - real *radius; /* Radius for GBSA stuff */ - real *vol; /* Effective volume for GBSA */ - real *surftens; /* Surface tension with water, for GBSA */ - real *gb_radius; /* Radius for Still model */ - real *S_hct; /* Overlap factor for HCT model */ int *atomnumber; /* Atomic number, used for QM/MM */ } t_gpp_atomtype; @@ -168,56 +163,6 @@ int get_atomtype_atomnumber(int nt, gpp_atomtype_t ga) return ga->atomnumber[nt]; } -real get_atomtype_radius(int nt, gpp_atomtype_t ga) -{ - if ((nt < 0) || (nt >= ga->nr)) - { - return NOTSET; - } - - return ga->radius[nt]; -} - -real get_atomtype_vol(int nt, gpp_atomtype_t ga) -{ - if ((nt < 0) || (nt >= ga->nr)) - { - return NOTSET; - } - - return ga->vol[nt]; -} - -real get_atomtype_surftens(int nt, gpp_atomtype_t ga) -{ - if ((nt < 0) || (nt >= ga->nr)) - { - return NOTSET; - } - - return ga->surftens[nt]; -} - -real get_atomtype_gb_radius(int nt, gpp_atomtype_t ga) -{ - if ((nt < 0) || (nt >= ga->nr)) - { - return NOTSET; - } - - return ga->gb_radius[nt]; -} - -real get_atomtype_S_hct(int nt, gpp_atomtype_t ga) -{ - if ((nt < 0) || (nt >= ga->nr)) - { - return NOTSET; - } - - return ga->S_hct[nt]; -} - real get_atomtype_nbparam(int nt, int param, gpp_atomtype_t ga) { if ((nt < 0) || (nt >= ga->nr)) @@ -242,41 +187,14 @@ gpp_atomtype_t init_atomtype(void) ga->atomname = nullptr; ga->nb = nullptr; ga->bondatomtype = nullptr; - ga->radius = nullptr; - ga->vol = nullptr; - ga->surftens = nullptr; ga->atomnumber = nullptr; - ga->gb_radius = nullptr; - ga->S_hct = nullptr; return ga; } -int -set_atomtype_gbparam(gpp_atomtype_t ga, int i, - real radius, real vol, real surftens, - real gb_radius, real S_hct) -{ - if ( (i < 0) || (i >= ga->nr)) - { - return NOTSET; - } - - ga->radius[i] = radius; - ga->vol[i] = vol; - ga->surftens[i] = surftens; - ga->gb_radius[i] = gb_radius; - ga->S_hct[i] = S_hct; - - return i; -} - - int set_atomtype(int nt, gpp_atomtype_t ga, t_symtab *tab, t_atom *a, const char *name, t_param *nb, - int bondatomtype, - real radius, real vol, real surftens, int atomnumber, - real gb_radius, real S_hct) + int bondatomtype, int atomnumber) { if ((nt < 0) || (nt >= ga->nr)) { @@ -287,21 +205,14 @@ int set_atomtype(int nt, gpp_atomtype_t ga, t_symtab *tab, ga->atomname[nt] = put_symtab(tab, name); ga->nb[nt] = *nb; ga->bondatomtype[nt] = bondatomtype; - ga->radius[nt] = radius; - ga->vol[nt] = vol; - ga->surftens[nt] = surftens; ga->atomnumber[nt] = atomnumber; - ga->gb_radius[nt] = gb_radius; - ga->S_hct[nt] = S_hct; return nt; } int add_atomtype(gpp_atomtype_t ga, t_symtab *tab, t_atom *a, const char *name, t_param *nb, - int bondatomtype, - real radius, real vol, real surftens, int atomnumber, - real gb_radius, real S_hct) + int bondatomtype, int atomnumber) { int i; @@ -323,15 +234,9 @@ int add_atomtype(gpp_atomtype_t ga, t_symtab *tab, srenew(ga->atomname, ga->nr); srenew(ga->nb, ga->nr); srenew(ga->bondatomtype, ga->nr); - srenew(ga->radius, ga->nr); - srenew(ga->vol, ga->nr); - srenew(ga->surftens, ga->nr); srenew(ga->atomnumber, ga->nr); - srenew(ga->gb_radius, ga->nr); - srenew(ga->S_hct, ga->nr); - return set_atomtype(ga->nr-1, ga, tab, a, name, nb, bondatomtype, radius, - vol, surftens, atomnumber, gb_radius, S_hct); + return set_atomtype(ga->nr-1, ga, tab, a, name, nb, bondatomtype, atomnumber); } else { @@ -364,11 +269,6 @@ void done_atomtype(gpp_atomtype_t ga) sfree(ga->atomname); sfree(ga->nb); sfree(ga->bondatomtype); - sfree(ga->radius); - sfree(ga->vol); - sfree(ga->gb_radius); - sfree(ga->S_hct); - sfree(ga->surftens); sfree(ga->atomnumber); ga->nr = 0; sfree(ga); @@ -404,15 +304,10 @@ static int search_atomtypes(gpp_atomtype_t ga, int *n, int typelist[], bFound = (param[ntype*typelist[i]+j].c[k] == param[ntype*thistype+j].c[k]); } - /* Check radius, volume, surftens */ + /* Check atomnumber */ tli = typelist[i]; bFound = bFound && - (get_atomtype_radius(tli, ga) == get_atomtype_radius(thistype, ga)) && - (get_atomtype_vol(tli, ga) == get_atomtype_vol(thistype, ga)) && - (get_atomtype_surftens(tli, ga) == get_atomtype_surftens(thistype, ga)) && - (get_atomtype_atomnumber(tli, ga) == get_atomtype_atomnumber(thistype, ga)) && - (get_atomtype_gb_radius(tli, ga) == get_atomtype_gb_radius(thistype, ga)) && - (get_atomtype_S_hct(tli, ga) == get_atomtype_S_hct(thistype, ga)); + (get_atomtype_atomnumber(tli, ga) == get_atomtype_atomnumber(thistype, ga)); } if (bFound) { @@ -446,11 +341,6 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop, t_atoms *atoms; t_param *nbsnew; int *typelist; - real *new_radius; - real *new_vol; - real *new_surftens; - real *new_gb_radius; - real *new_S_hct; int *new_atomnumber; char ***new_atomname; @@ -467,10 +357,6 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop, * ones with identical nonbonded interactions, in addition * to removing unused ones. * - * With Generalized-Born electrostatics, or implicit solvent - * we also check that the atomtype radius, effective_volume - * and surface tension match. - * * With QM/MM we also check that the atom numbers match */ @@ -512,12 +398,7 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop, } } - snew(new_radius, nat); - snew(new_vol, nat); - snew(new_surftens, nat); snew(new_atomnumber, nat); - snew(new_gb_radius, nat); - snew(new_S_hct, nat); snew(new_atomname, nat); /* We now have a list of unique atomtypes in typelist */ @@ -544,12 +425,7 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop, nbsnew[k].c[l] = plist[ftype].param[ntype*mi+mj].c[l]; } } - new_radius[i] = get_atomtype_radius(mi, ga); - new_vol[i] = get_atomtype_vol(mi, ga); - new_surftens[i] = get_atomtype_surftens(mi, ga); new_atomnumber[i] = get_atomtype_atomnumber(mi, ga); - new_gb_radius[i] = get_atomtype_gb_radius(mi, ga); - new_S_hct[i] = get_atomtype_S_hct(mi, ga); new_atomname[i] = ga->atomname[mi]; } @@ -563,21 +439,11 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop, plist[ftype].nr = i; mtop->ffparams.atnr = nat; - sfree(ga->radius); - sfree(ga->vol); - sfree(ga->surftens); sfree(ga->atomnumber); - sfree(ga->gb_radius); - sfree(ga->S_hct); /* Dangling atomname pointers ? */ sfree(ga->atomname); - ga->radius = new_radius; - ga->vol = new_vol; - ga->surftens = new_surftens; ga->atomnumber = new_atomnumber; - ga->gb_radius = new_gb_radius; - ga->S_hct = new_S_hct; ga->atomname = new_atomname; ga->nr = nat; @@ -593,20 +459,10 @@ void copy_atomtype_atomtypes(gpp_atomtype_t ga, t_atomtypes *atomtypes) /* Copy the atomtype data to the topology atomtype list */ ntype = get_atomtype_ntypes(ga); atomtypes->nr = ntype; - snew(atomtypes->radius, ntype); - snew(atomtypes->vol, ntype); - snew(atomtypes->surftens, ntype); snew(atomtypes->atomnumber, ntype); - snew(atomtypes->gb_radius, ntype); - snew(atomtypes->S_hct, ntype); for (i = 0; i < ntype; i++) { - atomtypes->radius[i] = ga->radius[i]; - atomtypes->vol[i] = ga->vol[i]; - atomtypes->surftens[i] = ga->surftens[i]; atomtypes->atomnumber[i] = ga->atomnumber[i]; - atomtypes->gb_radius[i] = ga->gb_radius[i]; - atomtypes->S_hct[i] = ga->S_hct[i]; } } diff --git a/src/gromacs/gmxpreprocess/gpp_atomtype.h b/src/gromacs/gmxpreprocess/gpp_atomtype.h index 8197540305..66edce1ee2 100644 --- a/src/gromacs/gmxpreprocess/gpp_atomtype.h +++ b/src/gromacs/gmxpreprocess/gpp_atomtype.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011,2014,2015, by the GROMACS development team, led by + * Copyright (c) 2011,2014,2015,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -60,11 +60,6 @@ real get_atomtype_massA(int nt, gpp_atomtype_t at); real get_atomtype_massB(int nt, gpp_atomtype_t at); real get_atomtype_qA(int nt, gpp_atomtype_t at); real get_atomtype_qB(int nt, gpp_atomtype_t at); -real get_atomtype_radius(int nt, gpp_atomtype_t at); -real get_atomtype_vol(int nt, gpp_atomtype_t at); -real get_atomtype_surftens(int nt, gpp_atomtype_t at); -real get_atomtype_gb_radius(int nt, gpp_atomtype_t at); -real get_atomtype_S_hct(int nt, gpp_atomtype_t at); int get_atomtype_ptype(int nt, gpp_atomtype_t at); int get_atomtype_batype(int nt, gpp_atomtype_t at); int get_atomtype_atomnumber(int nt, gpp_atomtype_t at); @@ -82,22 +77,13 @@ void done_atomtype(gpp_atomtype_t at); int set_atomtype(int nt, gpp_atomtype_t at, struct t_symtab *tab, t_atom *a, const char *name, t_param *nb, - int bondatomtype, - real radius, real vol, real surftens, int atomnumber, - real gb_radius, real S_hct); + int bondatomtype, int atomnumber); /* Set the values of an existing atom type nt. Returns nt on success or NOTSET on error. */ -int -set_atomtype_gbparam(gpp_atomtype_t at, int i, - real radius, real vol, real surftens, - real gb_radius, real S_hct); - int add_atomtype(gpp_atomtype_t at, struct t_symtab *tab, t_atom *a, const char *name, t_param *nb, - int bondatomtype, - real radius, real vol, real surftens, int atomnumber, - real gb_radius, real S_hct); + int bondatomtype, int atomnumber); /* Add a complete new atom type to an existing atomtype structure. Returns the number of the atom type. */ diff --git a/src/gromacs/gmxpreprocess/grompp.cpp b/src/gromacs/gmxpreprocess/grompp.cpp index 30cbe16288..2133aa7194 100644 --- a/src/gromacs/gmxpreprocess/grompp.cpp +++ b/src/gromacs/gmxpreprocess/grompp.cpp @@ -77,7 +77,6 @@ #include "gromacs/mdlib/calc_verletbuf.h" #include "gromacs/mdlib/compute_io.h" #include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/genborn.h" #include "gromacs/mdlib/perf_est.h" #include "gromacs/mdlib/sim_util.h" #include "gromacs/mdrunutility/mdmodules.h" @@ -520,23 +519,16 @@ new_status(const char *topfile, const char *topppfile, const char *confin, gmx_molblock_t *molblock, *molbs; int mb, i, nrmols, nmismatch; char buf[STRLEN]; - gmx_bool bGB = FALSE; char warn_buf[STRLEN]; init_mtop(sys); - /* Set gmx_boolean for GB */ - if (ir->implicit_solvent) - { - bGB = TRUE; - } - /* TOPOLOGY processing */ sys->name = do_top(bVerbose, topfile, topppfile, opts, bZero, &(sys->symtab), plist, comb, reppow, fudgeQQ, atype, &nrmols, &molinfo, intermolecular_interactions, ir, - &nmolblock, &molblock, bGB, + &nmolblock, &molblock, wi); sys->nmolblock = 0; @@ -1270,73 +1262,6 @@ static int count_constraints(gmx_mtop_t *mtop, t_molinfo *mi, warninp_t wi) return count; } -static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype) -{ - int i, nmiss, natoms, mt; - real q; - const t_atoms *atoms; - - nmiss = 0; - for (mt = 0; mt < sys->nmoltype; mt++) - { - atoms = &sys->moltype[mt].atoms; - natoms = atoms->nr; - - for (i = 0; i < natoms; i++) - { - q = atoms->atom[i].q; - if ((get_atomtype_radius(atoms->atom[i].type, atype) == 0 || - get_atomtype_vol(atoms->atom[i].type, atype) == 0 || - get_atomtype_surftens(atoms->atom[i].type, atype) == 0 || - get_atomtype_gb_radius(atoms->atom[i].type, atype) == 0 || - get_atomtype_S_hct(atoms->atom[i].type, atype) == 0) && - q != 0) - { - fprintf(stderr, "\nGB parameter(s) zero for atom type '%s' while charge is %g\n", - get_atomtype_name(atoms->atom[i].type, atype), q); - nmiss++; - } - } - } - - if (nmiss > 0) - { - gmx_fatal(FARGS, "Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.", nmiss); - } -} - - -static void check_gbsa_params(gpp_atomtype_t atype) -{ - int nmiss, i; - - /* If we are doing GBSA, check that we got the parameters we need - * This checking is to see if there are GBSA paratmeters for all - * atoms in the force field. To go around this for testing purposes - * comment out the nerror++ counter temporarily - */ - nmiss = 0; - for (i = 0; i < get_atomtype_ntypes(atype); i++) - { - if (get_atomtype_radius(i, atype) < 0 || - get_atomtype_vol(i, atype) < 0 || - get_atomtype_surftens(i, atype) < 0 || - get_atomtype_gb_radius(i, atype) < 0 || - get_atomtype_S_hct(i, atype) < 0) - { - fprintf(stderr, "\nGB parameter(s) missing or negative for atom type '%s'\n", - get_atomtype_name(i, atype)); - nmiss++; - } - } - - if (nmiss > 0) - { - gmx_fatal(FARGS, "Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.", nmiss); - } - -} - static real calc_temp(const gmx_mtop_t *mtop, const t_inputrec *ir, rvec *v) @@ -2061,15 +1986,9 @@ int gmx_grompp(int argc, char *argv[]) get_atomtype_ntypes(atype); } - if (ir->implicit_solvent != eisNO) + if (ir->implicit_solvent) { - /* Now we have renumbered the atom types, we can check the GBSA params */ - check_gbsa_params(atype); - - /* Check that all atoms that have charge and/or LJ-parameters also have - * sensible GB-parameters - */ - check_gbsa_params_charged(sys, atype); + gmx_fatal(FARGS, "Implicit solvation is no longer supported"); } /* PELA: Copy the atomtype data to the topology atomtype list */ diff --git a/src/gromacs/gmxpreprocess/nm2type.cpp b/src/gromacs/gmxpreprocess/nm2type.cpp index 25f770f7f0..3b7d145a8c 100644 --- a/src/gromacs/gmxpreprocess/nm2type.cpp +++ b/src/gromacs/gmxpreprocess/nm2type.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -343,7 +343,7 @@ int nm2type(int nnm, t_nm2type nm2t[], struct t_symtab *tab, t_atoms *atoms, atoms->atom[i].qB = alpha; atoms->atom[i].m = atoms->atom[i].mB = mm; k = add_atomtype(atype, tab, &(atoms->atom[i]), type, param, - atoms->atom[i].type, 0, 0, 0, atomnr, 0, 0); + atoms->atom[i].type, atomnr); } atoms->atom[i].type = k; atoms->atom[i].typeB = k; diff --git a/src/gromacs/gmxpreprocess/readir.cpp b/src/gromacs/gmxpreprocess/readir.cpp index eab0056b10..8286170b7d 100644 --- a/src/gromacs/gmxpreprocess/readir.cpp +++ b/src/gromacs/gmxpreprocess/readir.cpp @@ -370,11 +370,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, warning_error(wi, warn_buf); } - if (ir->implicit_solvent != eisNO) - { - warning_error(wi, "Implicit solvent is not (yet) supported with the with Verlet lists."); - } - if (EEL_USER(ir->coulombtype)) { sprintf(warn_buf, "Coulomb type %s is not supported with the verlet scheme", eel_names[ir->coulombtype]); @@ -1050,12 +1045,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, warning(wi, warn_buf); } - if (ir->epsilon_r != 1 && ir->implicit_solvent == eisGBSA) - { - sprintf(warn_buf, "epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric", ir->epsilon_r); - warning_note(wi, warn_buf); - } - if (EEL_RF(ir->coulombtype) && ir->epsilon_rf == 1 && ir->epsilon_r != 1) { sprintf(warn_buf, "epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf", ir->epsilon_r); @@ -1067,9 +1056,9 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, if (ir->epsilon_r == 0) { sprintf(err_buf, - "It is pointless to use long-range or Generalized Born electrostatics with infinite relative permittivity." + "It is pointless to use long-range electrostatics with infinite relative permittivity." "Since you are effectively turning of electrostatics, a plain cutoff will be much faster."); - CHECK(EEL_FULL(ir->coulombtype) || ir->implicit_solvent == eisGBSA); + CHECK(EEL_FULL(ir->coulombtype)); } if (getenv("GMX_DO_GALACTIC_DYNAMICS") == nullptr) @@ -1333,60 +1322,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, warning_error(wi, warn_buf); } - if (ir->sa_algorithm == esaSTILL) - { - sprintf(err_buf, "Still SA algorithm not available yet, use %s or %s instead\n", esa_names[esaAPPROX], esa_names[esaNO]); - CHECK(ir->sa_algorithm == esaSTILL); - } - - if (ir->implicit_solvent == eisGBSA) - { - sprintf(err_buf, "With GBSA implicit solvent, rgbradii must be equal to rlist."); - CHECK(ir->rgbradii != ir->rlist); - - if (ir->coulombtype != eelCUT) - { - sprintf(err_buf, "With GBSA, coulombtype must be equal to %s\n", eel_names[eelCUT]); - CHECK(ir->coulombtype != eelCUT); - } - if (ir->vdwtype != evdwCUT) - { - sprintf(err_buf, "With GBSA, vdw-type must be equal to %s\n", evdw_names[evdwCUT]); - CHECK(ir->vdwtype != evdwCUT); - } - if (ir->nstgbradii < 1) - { - sprintf(warn_buf, "Using GBSA with nstgbradii<1, setting nstgbradii=1"); - warning_note(wi, warn_buf); - ir->nstgbradii = 1; - } - if (ir->sa_algorithm == esaNO) - { - sprintf(warn_buf, "No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n"); - warning_note(wi, warn_buf); - } - if (ir->sa_surface_tension < 0 && ir->sa_algorithm != esaNO) - { - sprintf(warn_buf, "Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n"); - warning_note(wi, warn_buf); - - if (ir->gb_algorithm == egbSTILL) - { - ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100; - } - else - { - ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100; - } - } - if (ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO) - { - sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n"); - CHECK(ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO); - } - - } - if (ir->bQMMM) { if (ir->cutoff_scheme != ecutsGROUP) @@ -1834,6 +1769,8 @@ void get_ir(const char *mdparin, const char *mdparout, t_lambda *fep = ir->fepvals; t_expanded *expand = ir->expandedvals; + const char *no_names[] = { "no", nullptr }; + init_inputrec_strings(); gmx::TextInputFile stream(mdparin); inp = read_inpfile(&stream, mdparin, &ninp, wi); @@ -1878,6 +1815,17 @@ void get_ir(const char *mdparin, const char *mdparout, REM_TYPE("rlistlong"); REM_TYPE("nstcalclr"); REM_TYPE("pull-print-com2"); + REM_TYPE("gb-algorithm"); + REM_TYPE("nstgbradii"); + REM_TYPE("rgbradii"); + REM_TYPE("gb-epsilon-solvent"); + REM_TYPE("gb-saltconc"); + REM_TYPE("gb-obc-alpha"); + REM_TYPE("gb-obc-beta"); + REM_TYPE("gb-obc-gamma"); + REM_TYPE("gb-dielectric-offset"); + REM_TYPE("sa-algorithm"); + REM_TYPE("sa-surface-tension"); /* replace the following commands with the clearer new versions*/ REPL_TYPE("unconstrained-start", "continuation"); @@ -2008,30 +1956,10 @@ void get_ir(const char *mdparin, const char *mdparout, EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names); RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0); - CCTYPE("IMPLICIT SOLVENT ALGORITHM"); - EETYPE("implicit-solvent", ir->implicit_solvent, eis_names); - - CCTYPE ("GENERALIZED BORN ELECTROSTATICS"); - CTYPE ("Algorithm for calculating Born radii"); - EETYPE("gb-algorithm", ir->gb_algorithm, egb_names); - CTYPE ("Frequency of calculating the Born radii inside rlist"); - ITYPE ("nstgbradii", ir->nstgbradii, 1); - CTYPE ("Cutoff for Born radii calculation; the contribution from atoms"); - CTYPE ("between rlist and rgbradii is updated every nstlist steps"); - RTYPE ("rgbradii", ir->rgbradii, 1.0); - CTYPE ("Dielectric coefficient of the implicit solvent"); - RTYPE ("gb-epsilon-solvent", ir->gb_epsilon_solvent, 80.0); - CTYPE ("Salt concentration in M for Generalized Born models"); - RTYPE ("gb-saltconc", ir->gb_saltconc, 0.0); - CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)"); - RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0); - RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8); - RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85); - RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009); - EETYPE("sa-algorithm", ir->sa_algorithm, esa_names); - CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA"); - CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models."); - RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1); + /* Implicit solvation is no longer supported, but we need grompp + to be able to refuse old .mdp files that would have built a tpr + to run it. Thus, only "no" is accepted. */ + EETYPE("implicit-solvent", ir->implicit_solvent, no_names); /* Coupling stuff */ CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS"); @@ -2362,8 +2290,9 @@ void get_ir(const char *mdparin, const char *mdparout, RTYPE("threshold", ir->swap->threshold, 1.0); } - /* AdResS is no longer supported, but we need mdrun to be able to refuse to run old AdResS .tpr files */ - EETYPE("adress", ir->bAdress, yesno_names); + /* AdResS is no longer supported, but we need grompp to be able to + refuse to process old .mdp files that used it. */ + EETYPE("adress", ir->bAdress, no_names); /* User defined thingies */ CCTYPE ("User defined thingies"); @@ -4228,7 +4157,7 @@ void triple_check(const char *mdparin, t_inputrec *ir, gmx_mtop_t *sys, } else { - if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent) + if (ir->coulombtype == eelCUT && ir->rcoulomb > 0) { sprintf(err_buf, "You are using a plain Coulomb cut-off, which might produce artifacts.\n" diff --git a/src/gromacs/gmxpreprocess/resall.cpp b/src/gromacs/gmxpreprocess/resall.cpp index 689eae682f..4304dbda00 100644 --- a/src/gromacs/gmxpreprocess/resall.cpp +++ b/src/gromacs/gmxpreprocess/resall.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -90,7 +90,7 @@ gpp_atomtype_t read_atype(const char *ffdir, t_symtab *tab) if (sscanf(buf, "%s%lf", name, &m) == 2) { a->m = m; - add_atomtype(at, tab, a, name, nb, 0, 0.0, 0.0, 0.0, 0, 0.0, 0.0 ); + add_atomtype(at, tab, a, name, nb, 0, 0); fprintf(stderr, "\rAtomtype %d", ++nratt); fflush(stderr); } diff --git a/src/gromacs/gmxpreprocess/tests/readir.cpp b/src/gromacs/gmxpreprocess/tests/readir.cpp index 6fd6f7d274..6500dd0beb 100644 --- a/src/gromacs/gmxpreprocess/tests/readir.cpp +++ b/src/gromacs/gmxpreprocess/tests/readir.cpp @@ -189,5 +189,17 @@ TEST_F(GetIrTest, TerminatesOnDuplicateOldAndNewKeys) EXPECT_DEATH(runTest(joinStrings(inputMdpFile, "\n")), "A parameter is present with both"); } +TEST_F(GetIrTest, ImplicitSolventNoWorks) +{ + const char *inputMdpFile = "implicit-solvent = no"; + runTest(inputMdpFile); +} + +TEST_F(GetIrTest, ImplicitSolventYesWorks) +{ + const char *inputMdpFile = "implicit-solvent = yes"; + EXPECT_DEATH(runTest(inputMdpFile), "Invalid enum"); +} + } // namespace } // namespace diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml index 8d2b356f90..c0edc1f5e2 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml index 27684c38d6..538864d80b 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml index 0d8780d26b..cc6a71bdc6 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml new file mode 100644 index 0000000000..c0edc1f5e2 --- /dev/null +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml @@ -0,0 +1,321 @@ + + + + false + +; VARIOUS PREPROCESSING OPTIONS +; Preprocessor information: use cpp syntax. +; e.g.: -I/home/joe/doe -I/home/mary/roe +include = +; e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive) +define = + +; RUN CONTROL PARAMETERS +integrator = md +; Start time and timestep in ps +tinit = 0 +dt = 0.001 +nsteps = 0 +; For exact run continuation or redoing part of a run +init-step = 0 +; Part index is updated automatically on checkpointing (keeps files separate) +simulation-part = 1 +; mode for center of mass motion removal +comm-mode = Linear +; number of steps for center of mass motion removal +nstcomm = 100 +; group(s) for center of mass motion removal +comm-grps = + +; LANGEVIN DYNAMICS OPTIONS +; Friction coefficient (amu/ps) and random seed +bd-fric = 0 +ld-seed = -1 + +; ENERGY MINIMIZATION OPTIONS +; Force tolerance and initial step-size +emtol = 10 +emstep = 0.01 +; Max number of iterations in relax-shells +niter = 20 +; Step size (ps^2) for minimization of flexible constraints +fcstep = 0 +; Frequency of steepest descents steps when doing CG +nstcgsteep = 1000 +nbfgscorr = 10 + +; TEST PARTICLE INSERTION OPTIONS +rtpi = 0.05 + +; OUTPUT CONTROL OPTIONS +; Output frequency for coords (x), velocities (v) and forces (f) +nstxout = 0 +nstvout = 0 +nstfout = 0 +; Output frequency for energies to log file and energy file +nstlog = 1000 +nstcalcenergy = 100 +nstenergy = 1000 +; Output frequency and precision for .xtc file +nstxout-compressed = 0 +compressed-x-precision = 1000 +; This selects the subset of atoms for the compressed +; trajectory file. You can select multiple groups. By +; default, all atoms will be written. +compressed-x-grps = +; Selection of energy groups +energygrps = + +; NEIGHBORSEARCHING PARAMETERS +; cut-off scheme (Verlet: particle based cut-offs, group: using charge groups) +cutoff-scheme = Verlet +; nblist update frequency +nstlist = 10 +; ns algorithm (simple or grid) +ns-type = Grid +; Periodic boundary conditions: xyz, no, xy +pbc = xyz +periodic-molecules = no +; Allowed energy error due to the Verlet buffer in kJ/mol/ps per atom, +; a value of -1 means: use rlist +verlet-buffer-tolerance = 0.005 +; nblist cut-off +rlist = 1 +; long-range cut-off for switched potentials + +; OPTIONS FOR ELECTROSTATICS AND VDW +; Method for doing electrostatics +coulombtype = Cut-off +coulomb-modifier = Potential-shift-Verlet +rcoulomb-switch = 0 +rcoulomb = 1 +; Relative dielectric constant for the medium and the reaction field +epsilon-r = 1 +epsilon-rf = 0 +; Method for doing Van der Waals +vdw-type = Cut-off +vdw-modifier = Potential-shift-Verlet +; cut-off lengths +rvdw-switch = 0 +rvdw = 1 +; Apply long range dispersion corrections for Energy and Pressure +DispCorr = No +; Extension of the potential lookup tables beyond the cut-off +table-extension = 1 +; Separate tables between energy group pairs +energygrp-table = +; Spacing for the PME/PPPM FFT grid +fourierspacing = 0.12 +; FFT grid size, when a value is 0 fourierspacing will be used +fourier-nx = 0 +fourier-ny = 0 +fourier-nz = 0 +; EWALD/PME/PPPM parameters +pme-order = 4 +ewald-rtol = 1e-05 +ewald-rtol-lj = 0.001 +lj-pme-comb-rule = Geometric +ewald-geometry = 3d +epsilon-surface = 0 +implicit-solvent = no + +; OPTIONS FOR WEAK COUPLING ALGORITHMS +; Temperature coupling +tcoupl = No +nsttcouple = -1 +nh-chain-length = 10 +print-nose-hoover-chain-variables = no +; Groups to couple separately +tc-grps = +; Time constant (ps) and reference temperature (K) +tau-t = +ref-t = +; pressure coupling +pcoupl = No +pcoupltype = Isotropic +nstpcouple = -1 +; Time constant (ps), compressibility (1/bar) and reference P (bar) +tau-p = 1 +compressibility = +ref-p = +; Scaling of reference coordinates, No, All or COM +refcoord-scaling = No + +; OPTIONS FOR QMMM calculations +QMMM = no +; Groups treated Quantum Mechanically +QMMM-grps = +; QM method +QMmethod = +; QMMM scheme +QMMMscheme = normal +; QM basisset +QMbasis = +; QM charge +QMcharge = +; QM multiplicity +QMmult = +; Surface Hopping +SH = +; CAS space options +CASorbitals = +CASelectrons = +SAon = +SAoff = +SAsteps = +; Scale factor for MM charges +MMChargeScaleFactor = 1 + +; SIMULATED ANNEALING +; Type of annealing for each temperature group (no/single/periodic) +annealing = +; Number of time points to use for specifying annealing in each group +annealing-npoints = +; List of times at the annealing points for each group +annealing-time = +; Temp. at each annealing point, for each group. +annealing-temp = + +; GENERATE VELOCITIES FOR STARTUP RUN +gen-vel = no +gen-temp = 300 +gen-seed = -1 + +; OPTIONS FOR BONDS +constraints = none +; Type of constraint algorithm +constraint-algorithm = Lincs +; Do not constrain the start configuration +continuation = no +; Use successive overrelaxation to reduce the number of shake iterations +Shake-SOR = no +; Relative tolerance of shake +shake-tol = 0.0001 +; Highest order in the expansion of the constraint coupling matrix +lincs-order = 4 +; Number of iterations in the final step of LINCS. 1 is fine for +; normal simulations, but use 2 to conserve energy in NVE runs. +; For energy minimization with constraints it should be 4 to 8. +lincs-iter = 1 +; Lincs will write a warning to the stderr if in one step a bond +; rotates over more degrees than +lincs-warnangle = 30 +; Convert harmonic bonds to morse potentials +morse = no + +; ENERGY GROUP EXCLUSIONS +; Pairs of energy groups for which all non-bonded interactions are excluded +energygrp-excl = + +; WALLS +; Number of walls, type, atom types, densities and box-z scale factor for Ewald +nwall = 0 +wall-type = 9-3 +wall-r-linpot = -1 +wall-atomtype = +wall-density = +wall-ewald-zfac = 3 + +; COM PULLING +pull = no + +; AWH biasing +awh = no + +; ENFORCED ROTATION +; Enforced rotation: No or Yes +rotation = no + +; Group to display and/or manipulate in interactive MD session +IMD-group = + +; NMR refinement stuff +; Distance restraints type: No, Simple or Ensemble +disre = No +; Force weighting of pairs in one distance restraint: Conservative or Equal +disre-weighting = Conservative +; Use sqrt of the time averaged times the instantaneous violation +disre-mixed = no +disre-fc = 1000 +disre-tau = 0 +; Output frequency for pair distances to energy file +nstdisreout = 100 +; Orientation restraints: No or Yes +orire = no +; Orientation restraints force constant and tau for time averaging +orire-fc = 0 +orire-tau = 0 +orire-fitgrp = +; Output frequency for trace(SD) and S to energy file +nstorireout = 100 + +; Free energy variables +free-energy = no +couple-moltype = +couple-lambda0 = vdw-q +couple-lambda1 = vdw-q +couple-intramol = no +init-lambda = -1 +init-lambda-state = -1 +delta-lambda = 0 +nstdhdl = 50 +fep-lambdas = +mass-lambdas = +coul-lambdas = +vdw-lambdas = +bonded-lambdas = +restraint-lambdas = +temperature-lambdas = +calc-lambda-neighbors = 1 +init-lambda-weights = +dhdl-print-energy = no +sc-alpha = 0 +sc-power = 1 +sc-r-power = 6 +sc-sigma = 0.3 +sc-coul = no +separate-dhdl-file = yes +dhdl-derivatives = yes +dh_hist_size = 0 +dh_hist_spacing = 0.1 + +; Non-equilibrium MD stuff +acc-grps = +accelerate = +freezegrps = +freezedim = +cos-acceleration = 0 +deform = + +; simulated tempering variables +simulated-tempering = no +simulated-tempering-scaling = geometric +sim-temp-low = 300 +sim-temp-high = 300 + +; Ion/water position swapping for computational electrophysiology setups +; Swap positions along direction: no, X, Y, Z +swapcoords = no +adress = no + +; User defined thingies +user1-grps = +user2-grps = +userint1 = 0 +userint2 = 0 +userint3 = 0 +userint4 = 0 +userreal1 = 0 +userreal2 = 0 +userreal3 = 0 +userreal4 = 0 +; Electric fields +; Format for electric-field-x, etc. is: four real variables: +; amplitude (V/nm), frequency omega (1/ps), time for the pulse peak (ps), +; and sigma (ps) width of the pulse. Omega = 0 means static field, +; sigma = 0 means no pulse, leaving the field to be a cosine function. +electric-field-x = 0 0 0 0 +electric-field-y = 0 0 0 0 +electric-field-z = 0 0 0 0 + + diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml index 353f36905d..9f2bcabde7 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml index 538ebd3da9..8b52b14d80 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml index 0de50f50c7..4b2df698fa 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml index 8d2b356f90..c0edc1f5e2 100644 --- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml +++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml @@ -117,31 +117,7 @@ ewald-rtol-lj = 0.001 lj-pme-comb-rule = Geometric ewald-geometry = 3d epsilon-surface = 0 - -; IMPLICIT SOLVENT ALGORITHM -implicit-solvent = No - -; GENERALIZED BORN ELECTROSTATICS -; Algorithm for calculating Born radii -gb-algorithm = Still -; Frequency of calculating the Born radii inside rlist -nstgbradii = 1 -; Cutoff for Born radii calculation; the contribution from atoms -; between rlist and rgbradii is updated every nstlist steps -rgbradii = 1 -; Dielectric coefficient of the implicit solvent -gb-epsilon-solvent = 80 -; Salt concentration in M for Generalized Born models -gb-saltconc = 0 -; Scaling factors used in the OBC GB model. Default values are OBC(II) -gb-obc-alpha = 1 -gb-obc-beta = 0.8 -gb-obc-gamma = 4.85 -gb-dielectric-offset = 0.009 -sa-algorithm = Ace-approximation -; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA -; The value -1 will set default value for Still/HCT/OBC GB-models. -sa-surface-tension = -1 +implicit-solvent = no ; OPTIONS FOR WEAK COUPLING ALGORITHMS ; Temperature coupling diff --git a/src/gromacs/gmxpreprocess/topdirs.cpp b/src/gromacs/gmxpreprocess/topdirs.cpp index b955ba2d91..7844f89353 100644 --- a/src/gromacs/gmxpreprocess/topdirs.cpp +++ b/src/gromacs/gmxpreprocess/topdirs.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -366,8 +366,6 @@ void DS_Init(DirStack **DS) set_nec(&(necessary[d_angletypes]), d_atomtypes, d_none); set_nec(&(necessary[d_dihedraltypes]), d_atomtypes, d_none); set_nec(&(necessary[d_nonbond_params]), d_atomtypes, d_none); - set_nec(&(necessary[d_implicit_genborn_params]), d_atomtypes, d_none); - set_nec(&(necessary[d_implicit_surface_params]), d_atomtypes, d_none); set_nec(&(necessary[d_cmaptypes]), d_atomtypes, d_none); set_nec(&(necessary[d_moleculetype]), d_atomtypes, d_none); set_nec(&(necessary[d_atoms]), d_moleculetype, d_none); diff --git a/src/gromacs/gmxpreprocess/topio.cpp b/src/gromacs/gmxpreprocess/topio.cpp index 905e27da4e..73e5abf422 100644 --- a/src/gromacs/gmxpreprocess/topio.cpp +++ b/src/gromacs/gmxpreprocess/topio.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -65,7 +65,6 @@ #include "gromacs/gmxpreprocess/vsite_parm.h" #include "gromacs/math/units.h" #include "gromacs/math/utilities.h" -#include "gromacs/mdlib/genborn.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/pbcutil/pbc.h" @@ -396,182 +395,6 @@ static char ** cpp_opts(const char *define, const char *include, } -static int -find_gb_bondlength(t_params *plist, int ai, int aj, real *length) -{ - int i, j, a1, a2; - - int found = 0; - int status; - - for (i = 0; i < F_NRE && !found; i++) - { - if (IS_CHEMBOND(i)) - { - for (j = 0; j < plist[i].nr; j++) - { - a1 = plist[i].param[j].a[0]; - a2 = plist[i].param[j].a[1]; - - if ( (a1 == ai && a2 == aj) || (a1 == aj && a2 == ai)) - { - /* Equilibrium bond distance */ - *length = plist[i].param[j].c[0]; - found = 1; - } - } - } - } - status = !found; - - return status; -} - - -static int -find_gb_anglelength(t_params *plist, int ai, int ak, real *length) -{ - int i, j, a1, a2, a3; - real r12, r23, a123; - int found = 0; - int status, status1, status2; - - r12 = r23 = 0; - - for (i = 0; i < F_NRE && !found; i++) - { - if (IS_ANGLE(i)) - { - for (j = 0; j < plist[i].nr; j++) - { - a1 = plist[i].param[j].a[0]; - a2 = plist[i].param[j].a[1]; - a3 = plist[i].param[j].a[2]; - - /* We dont care what the middle atom is, but use it below */ - if ( (a1 == ai && a3 == ak) || (a1 == ak && a3 == ai) ) - { - /* Equilibrium bond distance */ - a123 = plist[i].param[j].c[0]; - /* Use middle atom to find reference distances r12 and r23 */ - status1 = find_gb_bondlength(plist, a1, a2, &r12); - status2 = find_gb_bondlength(plist, a2, a3, &r23); - - if (status1 == 0 && status2 == 0) - { - /* cosine theorem to get r13 */ - *length = std::sqrt(r12*r12+r23*r23-(2*r12*r23*cos(a123/RAD2DEG))); - found = 1; - } - } - } - } - } - status = !found; - - return status; -} - -static int -generate_gb_exclusion_interactions(t_molinfo *mi, gpp_atomtype_t atype, t_nextnb *nnb) -{ - int j, n, ai, aj, ti, tj; - int ftype; - t_param param; - t_params * plist; - t_atoms * at; - real radiusi, radiusj; - real gb_radiusi, gb_radiusj; - real param_c2, param_c4; - real distance; - - plist = mi->plist; - at = &mi->atoms; - - for (n = 1; n <= nnb->nrex; n++) - { - switch (n) - { - case 1: - ftype = F_GB12; - param_c2 = STILL_P2; - param_c4 = 0.8875; - break; - case 2: - ftype = F_GB13; - param_c2 = STILL_P3; - param_c4 = 0.3516; - break; - default: - /* Put all higher-order exclusions into 1,4 list so we dont miss them */ - ftype = F_GB14; - param_c2 = STILL_P3; - param_c4 = 0.3516; - break; - } - - for (ai = 0; ai < nnb->nr; ai++) - { - ti = at->atom[ai].type; - radiusi = get_atomtype_radius(ti, atype); - gb_radiusi = get_atomtype_gb_radius(ti, atype); - - for (j = 0; j < nnb->nrexcl[ai][n]; j++) - { - aj = nnb->a[ai][n][j]; - - /* Only add the interactions once */ - if (aj > ai) - { - tj = at->atom[aj].type; - radiusj = get_atomtype_radius(tj, atype); - gb_radiusj = get_atomtype_gb_radius(tj, atype); - - /* There is an exclusion of type "ftype" between atoms ai and aj */ - param.a[0] = ai; - param.a[1] = aj; - - /* Reference distance, not used for 1-4 interactions */ - switch (ftype) - { - case F_GB12: - if (find_gb_bondlength(plist, ai, aj, &distance) != 0) - { - gmx_fatal(FARGS, "Cannot find bond length for atoms %d-%d", ai, aj); - } - break; - case F_GB13: - if (find_gb_anglelength(plist, ai, aj, &distance) != 0) - { - gmx_fatal(FARGS, "Cannot find length for atoms %d-%d involved in angle", ai, aj); - } - break; - default: - distance = -1; - break; - } - /* Assign GB parameters */ - /* Sum of radii */ - param.c[0] = radiusi+radiusj; - /* Reference distance distance */ - param.c[1] = distance; - /* Still parameter */ - param.c[2] = param_c2; - /* GB radius */ - param.c[3] = gb_radiusi+gb_radiusj; - /* Parameter */ - param.c[4] = param_c4; - - /* Add it to the parameter list */ - add_param_to_list(&plist[ftype], ¶m); - } - } - } - } - return 0; -} - - static void make_atoms_sys(int nmolb, const gmx_molblock_t *molb, const t_molinfo *molinfo, t_atoms *atoms) @@ -615,7 +438,6 @@ static char **read_topol(const char *infile, const char *outfile, int *nmolblock, gmx_molblock_t **molblock, gmx_bool bFEP, - gmx_bool bGenborn, gmx_bool bZero, gmx_bool usingFullRangeElectrostatics, warninp_t wi) @@ -917,11 +739,15 @@ static char **read_topol(const char *infile, const char *outfile, */ case d_implicit_genborn_params: - push_gb_params(atype, pline, wi); + // Skip this line, so old topologies with + // GB parameters can be read. break; case d_implicit_surface_params: - gmx_fatal(FARGS, "Implicit surface directive not supported yet."); + // Skip this line, so that any topologies + // with surface parameters can be read + // (even though these were never formally + // supported). break; case d_cmaptypes: @@ -1068,14 +894,6 @@ static char **read_topol(const char *infile, const char *outfile, - /* nnb contains information about first,2nd,3rd bonded neighbors. - * Use this to generate GB 1-2,1-3,1-4 interactions when necessary. - */ - if (bGenborn == TRUE) - { - generate_gb_exclusion_interactions(mi0, atype, &nnb); - } - done_nnb(&nnb); if (bCouple) @@ -1184,7 +1002,6 @@ char **do_top(gmx_bool bVerbose, const t_inputrec *ir, int *nmolblock, gmx_molblock_t **molblock, - gmx_bool bGenborn, warninp_t wi) { /* Tmpfile might contain a long path */ @@ -1209,7 +1026,7 @@ char **do_top(gmx_bool bVerbose, nrmols, molinfo, intermolecular_interactions, plist, combination_rule, repulsion_power, opts, fudgeQQ, nmolblock, molblock, - ir->efep != efepNO, bGenborn, bZero, + ir->efep != efepNO, bZero, EEL_FULL(ir->coulombtype), wi); if ((*combination_rule != eCOMB_GEOMETRIC) && diff --git a/src/gromacs/gmxpreprocess/topio.h b/src/gromacs/gmxpreprocess/topio.h index 4023f8fceb..bb79551e62 100644 --- a/src/gromacs/gmxpreprocess/topio.h +++ b/src/gromacs/gmxpreprocess/topio.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2016,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -68,7 +68,6 @@ char **do_top(gmx_bool bVerbose, const t_inputrec *ir, int *nmolblock, gmx_molblock_t **molblock, - gmx_bool bGB, warninp_t wi); /* This routine expects sys->molt[m].ilist to be of size F_NRE and ordered. */ diff --git a/src/gromacs/gmxpreprocess/toppush.cpp b/src/gromacs/gmxpreprocess/toppush.cpp index 24381dcd06..c49ad80a76 100644 --- a/src/gromacs/gmxpreprocess/toppush.cpp +++ b/src/gromacs/gmxpreprocess/toppush.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -238,7 +238,6 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, char type[STRLEN], btype[STRLEN], ptype[STRLEN]; double m, q; double c[MAXFORCEPARAM]; - double radius, vol, surftens, gb_radius, S_hct; char tmpfield[12][100]; /* Max 12 fields of width 100 */ char errbuf[STRLEN]; t_atom *atom; @@ -308,12 +307,7 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, } /* optional fields */ - surftens = -1; - vol = -1; - radius = -1; - gb_radius = -1; atomnr = -1; - S_hct = -1; switch (nb_funct) { @@ -325,9 +319,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, { if (have_bonded_type) { - nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf", - type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf", + type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1]); if (nread < 8) { too_few(wi); @@ -337,9 +330,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, else { /* have_atomic_number && !have_bonded_type */ - nread = sscanf(line, "%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf", - type, &atomnr, &m, &q, ptype, &c[0], &c[1], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%d%lf%lf%s%lf%lf", + type, &atomnr, &m, &q, ptype, &c[0], &c[1]); if (nread < 7) { too_few(wi); @@ -352,9 +344,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, if (have_bonded_type) { /* !have_atomic_number && have_bonded_type */ - nread = sscanf(line, "%s%s%lf%lf%s%lf%lf%lf%lf%lf%lf", - type, btype, &m, &q, ptype, &c[0], &c[1], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%s%lf%lf%s%lf%lf", + type, btype, &m, &q, ptype, &c[0], &c[1]); if (nread < 7) { too_few(wi); @@ -364,9 +355,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, else { /* !have_atomic_number && !have_bonded_type */ - nread = sscanf(line, "%s%lf%lf%s%lf%lf%lf%lf%lf%lf", - type, &m, &q, ptype, &c[0], &c[1], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%lf%lf%s%lf%lf", + type, &m, &q, ptype, &c[0], &c[1]); if (nread < 6) { too_few(wi); @@ -394,9 +384,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, { if (have_bonded_type) { - nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf%lf", - type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf%lf", + type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2]); if (nread < 9) { too_few(wi); @@ -406,9 +395,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, else { /* have_atomic_number && !have_bonded_type */ - nread = sscanf(line, "%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf%lf", - type, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%d%lf%lf%s%lf%lf%lf", + type, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2]); if (nread < 8) { too_few(wi); @@ -421,9 +409,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, if (have_bonded_type) { /* !have_atomic_number && have_bonded_type */ - nread = sscanf(line, "%s%s%lf%lf%s%lf%lf%lf%lf%lf%lf%lf", - type, btype, &m, &q, ptype, &c[0], &c[1], &c[2], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%s%lf%lf%s%lf%lf%lf", + type, btype, &m, &q, ptype, &c[0], &c[1], &c[2]); if (nread < 8) { too_few(wi); @@ -433,9 +420,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, else { /* !have_atomic_number && !have_bonded_type */ - nread = sscanf(line, "%s%lf%lf%s%lf%lf%lf%lf%lf%lf%lf", - type, &m, &q, ptype, &c[0], &c[1], &c[2], - &radius, &vol, &surftens, &gb_radius); + nread = sscanf(line, "%s%lf%lf%s%lf%lf%lf", + type, &m, &q, ptype, &c[0], &c[1], &c[2]); if (nread < 7) { too_few(wi); @@ -517,15 +503,14 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat, sprintf(errbuf, "Overriding atomtype %s", type); warning(wi, errbuf); if ((nr = set_atomtype(nr, at, symtab, atom, type, param, batype_nr, - radius, vol, surftens, atomnr, gb_radius, S_hct)) == NOTSET) + atomnr)) == NOTSET) { sprintf(errbuf, "Replacing atomtype %s failed", type); warning_error_and_exit(wi, errbuf, FARGS); } } else if ((add_atomtype(at, symtab, atom, type, param, - batype_nr, radius, vol, - surftens, atomnr, gb_radius, S_hct)) == NOTSET) + batype_nr, atomnr)) == NOTSET) { sprintf(errbuf, "Adding atomtype %s failed", type); warning_error_and_exit(wi, errbuf, FARGS); @@ -1082,33 +1067,6 @@ void push_nbt(directive d, t_nbparam **nbt, gpp_atomtype_t atype, } } -void -push_gb_params (gpp_atomtype_t at, char *line, - warninp_t wi) -{ - int atype; - double radius, vol, surftens, gb_radius, S_hct; - char atypename[STRLEN]; - char errbuf[STRLEN]; - - if ( (sscanf(line, "%s%lf%lf%lf%lf%lf", atypename, &radius, &vol, &surftens, &gb_radius, &S_hct)) != 6) - { - sprintf(errbuf, "Too few gb parameters for type %s\n", atypename); - warning(wi, errbuf); - } - - /* Search for atomtype */ - atype = get_atomtype_type(atypename, at); - - if (atype == NOTSET) - { - printf("Couldn't find topology match for atomtype %s\n", atypename); - abort(); - } - - set_atomtype_gbparam(at, atype, radius, vol, surftens, gb_radius, S_hct); -} - void push_cmaptype(directive d, t_params bt[], int nral, gpp_atomtype_t at, t_bond_atomtype bat, char *line, @@ -2633,7 +2591,7 @@ int add_atomtype_decoupled(t_symtab *symtab, gpp_atomtype_t at, param.c[i] = 0.0; } - nr = add_atomtype(at, symtab, &atom, "decoupled", ¶m, -1, 0.0, 0.0, 0.0, 0, 0, 0); + nr = add_atomtype(at, symtab, &atom, "decoupled", ¶m, -1, 0); /* Add space in the non-bonded parameters matrix */ realloc_nb_params(at, nbparam, pair); diff --git a/src/gromacs/gmxpreprocess/toppush.h b/src/gromacs/gmxpreprocess/toppush.h index 24619f2398..3c6e2bc57f 100644 --- a/src/gromacs/gmxpreprocess/toppush.h +++ b/src/gromacs/gmxpreprocess/toppush.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -76,11 +76,6 @@ void push_nbt(directive d, t_nbparam **nbt, gpp_atomtype_t atype, char *plines, int nb_funct, warninp_t wi); -void -push_gb_params(gpp_atomtype_t atype, - char *line, - warninp_t wi); - void push_atom(struct t_symtab *symtab, t_block *cgs, t_atoms *at, diff --git a/src/gromacs/gmxpreprocess/toputil.cpp b/src/gromacs/gmxpreprocess/toputil.cpp index 6e84f4a3af..01766cfca8 100644 --- a/src/gromacs/gmxpreprocess/toputil.cpp +++ b/src/gromacs/gmxpreprocess/toputil.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2012,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -532,7 +532,7 @@ void print_bondeds(FILE *out, int natoms, directive d, { char buf[12]; sprintf(buf, "%4d", (i+1)); - add_atomtype(atype, &stab, a, buf, param, 0, 0, 0, 0, 0, 0, 0); + add_atomtype(atype, &stab, a, buf, param, 0, 0); } print_bt(out, d, atype, ftype, fsubtype, plist, TRUE); diff --git a/src/gromacs/listed-forces/listed-forces.cpp b/src/gromacs/listed-forces/listed-forces.cpp index 0de7f7b640..f7aedaa034 100644 --- a/src/gromacs/listed-forces/listed-forces.cpp +++ b/src/gromacs/listed-forces/listed-forces.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -405,8 +405,7 @@ ftype_is_bonded_potential(int ftype) { return (interaction_function[ftype].flags & IF_BOND) && - !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES) && - (ftype < F_GB12 || ftype > F_GB14); + !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES); } /*! \brief Compute the bonded part of the listed forces, parallelized over threads diff --git a/src/gromacs/mdlib/broadcaststructs.cpp b/src/gromacs/mdlib/broadcaststructs.cpp index 4977cf3acf..6a570aacc5 100644 --- a/src/gromacs/mdlib/broadcaststructs.cpp +++ b/src/gromacs/mdlib/broadcaststructs.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -769,23 +769,7 @@ static void bc_molblock(const t_commrec *cr, gmx_molblock_t *molb) static void bc_atomtypes(const t_commrec *cr, t_atomtypes *atomtypes) { - int nr; - block_bc(cr, atomtypes->nr); - - nr = atomtypes->nr; - - snew_bc(cr, atomtypes->radius, nr); - snew_bc(cr, atomtypes->vol, nr); - snew_bc(cr, atomtypes->surftens, nr); - snew_bc(cr, atomtypes->gb_radius, nr); - snew_bc(cr, atomtypes->S_hct, nr); - - nblock_bc(cr, nr, atomtypes->radius); - nblock_bc(cr, nr, atomtypes->vol); - nblock_bc(cr, nr, atomtypes->surftens); - nblock_bc(cr, nr, atomtypes->gb_radius); - nblock_bc(cr, nr, atomtypes->S_hct); } /*! \brief Broadcasts ir and mtop from the master to all nodes in diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp index f01ea4d333..4f21ca3cca 100644 --- a/src/gromacs/mdlib/force.cpp +++ b/src/gromacs/mdlib/force.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -57,7 +57,6 @@ #include "gromacs/math/vec.h" #include "gromacs/math/vecdump.h" #include "gromacs/mdlib/forcerec-threading.h" -#include "gromacs/mdlib/genborn.h" #include "gromacs/mdlib/mdrun.h" #include "gromacs/mdlib/ns.h" #include "gromacs/mdlib/qmmm.h" @@ -144,9 +143,6 @@ void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, gmx::ForceWithVirial *forceWithVirial, gmx_enerdata_t *enerd, t_fcdata *fcd, - gmx_localtop_t *top, - gmx_genborn_t *born, - gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, @@ -201,24 +197,6 @@ void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, enerd->dvdl_lin[efptVDW] += dvdl_walls; } - /* If doing GB, reset dvda and calculate the Born radii */ - if (ir->implicit_solvent) - { - wallcycle_sub_start(wcycle, ewcsNONBONDED); - - for (i = 0; i < born->nr; i++) - { - fr->dvda[i] = 0; - } - - if (bBornRadii) - { - calc_gb_rad(cr, fr, ir, top, x, fr->gblist, born, md, nrnb); - } - - wallcycle_sub_stop(wcycle, ewcsNONBONDED); - } - where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ @@ -273,17 +251,6 @@ void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, where(); } - /* If we are doing GB, calculate bonded forces and apply corrections - * to the solvation forces */ - /* MRS: Eventually, many need to include free energy contribution here! */ - if (ir->implicit_solvent) - { - wallcycle_sub_start(wcycle, ewcsLISTED); - calc_gb_forces(cr, md, born, top, x, forceForUseWithShiftForces, fr, idef, - ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); - wallcycle_sub_stop(wcycle, ewcsLISTED); - } - #if GMX_MPI if (TAKETIME) { @@ -703,8 +670,6 @@ void sum_epot(gmx_grppairener_t *grpp, real *epot) epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); - /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ - epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); /* lattice part of LR doesnt belong to any group * and has been added earlier diff --git a/src/gromacs/mdlib/force.h b/src/gromacs/mdlib/force.h index 2d83e17a25..6bfc66051c 100644 --- a/src/gromacs/mdlib/force.h +++ b/src/gromacs/mdlib/force.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -164,7 +164,6 @@ void do_force(FILE *log, t_commrec *cr, t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot, double t, struct gmx_edsam *ed, - gmx_bool bBornRadii, int flags, DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion, DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion); @@ -202,9 +201,6 @@ void do_force_lowlevel(t_forcerec *fr, gmx::ForceWithVirial *forceWithVirial, gmx_enerdata_t *enerd, t_fcdata *fcd, - gmx_localtop_t *top, - gmx_genborn_t *born, - gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 41f87dedc8..3b77599e06 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -1539,10 +1539,6 @@ gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec * ir->vdwtype == evdwCUT && ir->coulombtype == eelCUT && ir->efep == efepNO && - (ir->implicit_solvent == eisNO || - (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL || - ir->gb_algorithm == egbHCT || - ir->gb_algorithm == egbOBC))) && getenv("GMX_NO_ALLVSALL") == nullptr ); @@ -2346,7 +2342,7 @@ void init_forcerec(FILE *fp, gmx_bool bNoSolvOpt, real print_force) { - int i, m, negp_pp, negptable, egi, egj; + int m, negp_pp, negptable, egi, egj; real rtab; char *env; double dbl; @@ -2491,7 +2487,6 @@ void init_forcerec(FILE *fp, /* Check if we can/should do all-vs-all kernels */ fr->bAllvsAll = can_use_allvsall(ir, FALSE, nullptr, nullptr); fr->AllvsAll_work = nullptr; - fr->AllvsAll_workgb = nullptr; /* All-vs-all kernels have not been implemented in 4.6 and later. * See Redmine #1249. */ @@ -2592,7 +2587,6 @@ void init_forcerec(FILE *fp, fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC); } } - fr->bGB = (ir->implicit_solvent == eisGBSA); fr->rc_scaling = ir->refcoord_scaling; copy_rvec(ir->posres_com, fr->posres_com); @@ -2619,7 +2613,7 @@ void init_forcerec(FILE *fp, switch (ic->eeltype) { case eelCUT: - fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB; + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB; break; case eelRF: @@ -2875,63 +2869,9 @@ void init_forcerec(FILE *fp, set_avcsixtwelve(fp, fr, mtop); } - fr->gb_epsilon_solvent = ir->gb_epsilon_solvent; - - /* Copy the GBSA data (radius, volume and surftens for each - * atomtype) from the topology atomtype section to forcerec. - */ - snew(fr->atype_radius, fr->ntype); - snew(fr->atype_vol, fr->ntype); - snew(fr->atype_surftens, fr->ntype); - snew(fr->atype_gb_radius, fr->ntype); - snew(fr->atype_S_hct, fr->ntype); - - if (mtop->atomtypes.nr > 0) - { - for (i = 0; i < fr->ntype; i++) - { - fr->atype_radius[i] = mtop->atomtypes.radius[i]; - } - for (i = 0; i < fr->ntype; i++) - { - fr->atype_vol[i] = mtop->atomtypes.vol[i]; - } - for (i = 0; i < fr->ntype; i++) - { - fr->atype_surftens[i] = mtop->atomtypes.surftens[i]; - } - for (i = 0; i < fr->ntype; i++) - { - fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i]; - } - for (i = 0; i < fr->ntype; i++) - { - fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i]; - } - } - - /* Generate the GB table if needed */ - if (fr->bGB) + if (ir->implicit_solvent) { - GMX_LOG(mdlog.info).asParagraph(). - appendText("The support for implicit solvent is deprecated, and may be removed " - "in a future version."); -#if GMX_DOUBLE - fr->gbtabscale = 2000; -#else - fr->gbtabscale = 500; -#endif - - fr->gbtabr = 100; - fr->gbtab = make_gb_table(fr); - - init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm); - - /* Copy local gb data (for dd, this is done in dd_partition_system) */ - if (!DOMAINDECOMP(cr)) - { - make_local_gb(cr, fr->born, ir->gb_algorithm); - } + gmx_fatal(FARGS, "Implict solvation is no longer supported."); } /* Construct tables for the group scheme. A little unnecessary to diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h index 85a4383353..af7e5d77e8 100644 --- a/src/gromacs/mdlib/forcerec.h +++ b/src/gromacs/mdlib/forcerec.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -38,7 +38,6 @@ #define GMX_MDLIB_FORCEREC_H #include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/genborn.h" #include "gromacs/mdlib/tgroup.h" #include "gromacs/mdlib/vsite.h" #include "gromacs/mdtypes/forcerec.h" @@ -49,6 +48,7 @@ struct gmx_hw_info_t; struct t_commrec; struct t_fcdata; struct t_filenm; +struct t_inputrec; namespace gmx { diff --git a/src/gromacs/mdlib/genborn.cpp b/src/gromacs/mdlib/genborn.cpp deleted file mode 100644 index 7d9cc0f617..0000000000 --- a/src/gromacs/mdlib/genborn.cpp +++ /dev/null @@ -1,1713 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ - -#include "gmxpre.h" - -#include "genborn.h" - -#include - -#include - -#include - -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/fileio/pdbio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/genborn_allvsall.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/nblist.h" -#include "gromacs/pbcutil/ishift.h" -#include "gromacs/pbcutil/mshift.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/smalloc.h" - - -typedef struct { - int shift; - int naj; - int *aj; - int aj_nalloc; -} gbtmpnbl_t; - -typedef struct gbtmpnbls { - int nlist; - gbtmpnbl_t *list; - int list_nalloc; -} t_gbtmpnbls; - -/* This function is exactly the same as the one in listed-forces/bonded.cpp. The reason - * it is copied here is that the bonded gb-interactions are evaluated - * not in calc_bonds, but rather in calc_gb_forces - */ -static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx) -{ - if (pbc) - { - return pbc_dx_aiuc(pbc, xi, xj, dx); - } - else - { - rvec_sub(xi, xj, dx); - return CENTRAL; - } -} - -static int init_gb_nblist(int natoms, t_nblist *nl) -{ - nl->maxnri = natoms*4; - nl->maxnrj = 0; - nl->nri = 0; - nl->nrj = 0; - nl->iinr = nullptr; - nl->gid = nullptr; - nl->shift = nullptr; - nl->jindex = nullptr; - nl->jjnr = nullptr; - /*nl->nltype = nltype;*/ - - srenew(nl->iinr, nl->maxnri); - srenew(nl->gid, nl->maxnri); - srenew(nl->shift, nl->maxnri); - srenew(nl->jindex, nl->maxnri+1); - - nl->jindex[0] = 0; - - return 0; -} - - -static int init_gb_still(const t_atomtypes *atype, t_idef *idef, t_atoms *atoms, - gmx_genborn_t *born, int natoms) -{ - - int i, j, m, ia, ib; - real r, ri, rj, ri2, rj2, r3, r4, ratio, term, h, doffset; - - real *vsol; - real *gp; - - snew(vsol, natoms); - snew(gp, natoms); - snew(born->gpol_still_work, natoms+3); - - doffset = born->gb_doffset; - - for (i = 0; i < natoms; i++) - { - born->gpol_globalindex[i] = born->vsolv_globalindex[i] = - born->gb_radius_globalindex[i] = 0; - } - - /* Compute atomic solvation volumes for Still method */ - for (i = 0; i < natoms; i++) - { - ri = atype->gb_radius[atoms->atom[i].type]; - born->gb_radius_globalindex[i] = ri; - r3 = ri*ri*ri; - born->vsolv_globalindex[i] = (4*M_PI/3)*r3; - } - - for (j = 0; j < idef->il[F_GB12].nr; j += 3) - { - m = idef->il[F_GB12].iatoms[j]; - ia = idef->il[F_GB12].iatoms[j+1]; - ib = idef->il[F_GB12].iatoms[j+2]; - - r = 1.01*idef->iparams[m].gb.st; - - ri = atype->gb_radius[atoms->atom[ia].type]; - rj = atype->gb_radius[atoms->atom[ib].type]; - - ri2 = ri*ri; - rj2 = rj*rj; - - ratio = (rj2-ri2-r*r)/(2*ri*r); - h = ri*(1+ratio); - term = (M_PI/3.0)*h*h*(3.0*ri-h); - - born->vsolv_globalindex[ia] -= term; - - ratio = (ri2-rj2-r*r)/(2*rj*r); - h = rj*(1+ratio); - term = (M_PI/3.0)*h*h*(3.0*rj-h); - - born->vsolv_globalindex[ib] -= term; - } - - /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still - method */ - /* Self */ - for (j = 0; j < natoms; j++) - { - if (born->use_globalindex[j] == 1) - { - born->gpol_globalindex[j] = -0.5*ONE_4PI_EPS0/ - (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P1); - } - } - - /* 1-2 */ - for (j = 0; j < idef->il[F_GB12].nr; j += 3) - { - m = idef->il[F_GB12].iatoms[j]; - ia = idef->il[F_GB12].iatoms[j+1]; - ib = idef->il[F_GB12].iatoms[j+2]; - - r = idef->iparams[m].gb.st; - - r4 = r*r*r*r; - - born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+ - STILL_P2*born->vsolv_globalindex[ib]/r4; - born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+ - STILL_P2*born->vsolv_globalindex[ia]/r4; - } - - /* 1-3 */ - for (j = 0; j < idef->il[F_GB13].nr; j += 3) - { - m = idef->il[F_GB13].iatoms[j]; - ia = idef->il[F_GB13].iatoms[j+1]; - ib = idef->il[F_GB13].iatoms[j+2]; - - r = idef->iparams[m].gb.st; - r4 = r*r*r*r; - - born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+ - STILL_P3*born->vsolv_globalindex[ib]/r4; - born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+ - STILL_P3*born->vsolv_globalindex[ia]/r4; - } - - sfree(vsol); - sfree(gp); - - return 0; -} - -/* Initialize all GB datastructs and compute polarization energies */ -int init_gb(gmx_genborn_t **p_born, - t_forcerec *fr, const t_inputrec *ir, - const gmx_mtop_t *mtop, int gb_algorithm) -{ - int i, jj, natoms; - real rai, sk, doffset; - - t_atoms atoms; - gmx_genborn_t *born; - gmx_localtop_t *localtop; - - natoms = mtop->natoms; - - atoms = gmx_mtop_global_atoms(mtop); - localtop = gmx_mtop_generate_local_top(mtop, ir->efep != efepNO); - - snew(born, 1); - *p_born = born; - - born->nr = natoms; - - snew(born->drobc, natoms); - snew(born->bRad, natoms); - - /* Allocate memory for the global data arrays */ - snew(born->param_globalindex, natoms+3); - snew(born->gpol_globalindex, natoms+3); - snew(born->vsolv_globalindex, natoms+3); - snew(born->gb_radius_globalindex, natoms+3); - snew(born->use_globalindex, natoms+3); - - snew(fr->invsqrta, natoms); - snew(fr->dvda, natoms); - - fr->dadx = nullptr; - fr->dadx_rawptr = nullptr; - fr->nalloc_dadx = 0; - born->gpol_still_work = nullptr; - born->gpol_hct_work = nullptr; - - /* snew(born->asurf,natoms); */ - /* snew(born->dasurf,natoms); */ - - /* Initialize the gb neighbourlist */ - snew(fr->gblist, 1); - init_gb_nblist(natoms, fr->gblist); - - /* Do the Vsites exclusions (if any) */ - for (i = 0; i < natoms; i++) - { - jj = atoms.atom[i].type; - if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0) - { - born->use_globalindex[i] = 1; - } - else - { - born->use_globalindex[i] = 0; - } - - /* If we have a Vsite, put vs_globalindex[i]=0 */ - if (C6 (fr->nbfp, fr->ntype, jj, jj) == 0 && - C12(fr->nbfp, fr->ntype, jj, jj) == 0 && - atoms.atom[i].q == 0) - { - born->use_globalindex[i] = 0; - } - } - - /* Copy algorithm parameters from inputrecord to local structure */ - born->obc_alpha = ir->gb_obc_alpha; - born->obc_beta = ir->gb_obc_beta; - born->obc_gamma = ir->gb_obc_gamma; - born->gb_doffset = ir->gb_dielectric_offset; - born->gb_epsilon_solvent = ir->gb_epsilon_solvent; - born->epsilon_r = ir->epsilon_r; - - doffset = born->gb_doffset; - - /* Set the surface tension */ - born->sa_surface_tension = ir->sa_surface_tension; - - /* If Still model, initialise the polarisation energies */ - if (gb_algorithm == egbSTILL) - { - init_gb_still(&(mtop->atomtypes), &(localtop->idef), &atoms, - born, natoms); - } - - - /* If HCT/OBC, precalculate the sk*atype->S_hct factors */ - else if (gb_algorithm == egbHCT || gb_algorithm == egbOBC) - { - - snew(born->gpol_hct_work, natoms+3); - - for (i = 0; i < natoms; i++) - { - if (born->use_globalindex[i] == 1) - { - rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset; - sk = rai * mtop->atomtypes.S_hct[atoms.atom[i].type]; - born->param_globalindex[i] = sk; - born->gb_radius_globalindex[i] = rai; - } - else - { - born->param_globalindex[i] = 0; - born->gb_radius_globalindex[i] = 0; - } - } - } - - /* Allocate memory for work arrays for temporary use */ - snew(born->work, natoms+4); - snew(born->count, natoms); - snew(born->nblist_work, natoms); - - /* Domain decomposition specific stuff */ - born->nalloc = 0; - - return 0; -} - - - -static int -calc_gb_rad_still(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top, - rvec x[], t_nblist *nl, - gmx_genborn_t *born, t_mdatoms *md) -{ - int i, k, n, nj0, nj1, ai, aj; - int shift; - real shX, shY, shZ; - real gpi, dr2, idr4, rvdw, ratio, ccf, theta, term, rai, raj; - real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; - real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2; - real factor; - real vai, prod_ai, icf4, icf6; - - factor = 0.5*ONE_4PI_EPS0; - n = 0; - - for (i = 0; i < born->nr; i++) - { - born->gpol_still_work[i] = 0; - } - - for (i = 0; i < nl->nri; i++) - { - ai = nl->iinr[i]; - - nj0 = nl->jindex[i]; - nj1 = nl->jindex[i+1]; - - /* Load shifts for this list */ - shift = nl->shift[i]; - shX = fr->shift_vec[shift][0]; - shY = fr->shift_vec[shift][1]; - shZ = fr->shift_vec[shift][2]; - - gpi = 0; - - rai = top->atomtypes.gb_radius[md->typeA[ai]]; - vai = born->vsolv[ai]; - prod_ai = STILL_P4*vai; - - /* Load atom i coordinates, add shift vectors */ - ix1 = shX + x[ai][0]; - iy1 = shY + x[ai][1]; - iz1 = shZ + x[ai][2]; - - for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) - { - aj = nl->jjnr[k]; - jx1 = x[aj][0]; - jy1 = x[aj][1]; - jz1 = x[aj][2]; - - dx11 = ix1-jx1; - dy11 = iy1-jy1; - dz11 = iz1-jz1; - - dr2 = dx11*dx11+dy11*dy11+dz11*dz11; - rinv = gmx::invsqrt(dr2); - idr2 = rinv*rinv; - idr4 = idr2*idr2; - idr6 = idr4*idr2; - - raj = top->atomtypes.gb_radius[md->typeA[aj]]; - - rvdw = rai + raj; - - ratio = dr2 / (rvdw * rvdw); - vaj = born->vsolv[aj]; - - if (ratio > STILL_P5INV) - { - ccf = 1.0; - dccf = 0.0; - } - else - { - theta = ratio*STILL_PIP5; - cosq = cos(theta); - term = 0.5*(1.0-cosq); - ccf = term*term; - sinq = 1.0 - cosq*cosq; - dccf = 2.0*term*sinq*gmx::invsqrt(sinq)*theta; - } - - prod = STILL_P4*vaj; - icf4 = ccf*idr4; - icf6 = (4*ccf-dccf)*idr6; - born->gpol_still_work[aj] += prod_ai*icf4; - gpi = gpi+prod*icf4; - - /* Save ai->aj and aj->ai chain rule terms */ - fr->dadx[n++] = prod*icf6; - fr->dadx[n++] = prod_ai*icf6; - } - born->gpol_still_work[ai] += gpi; - } - - /* Parallel summations */ - if (DOMAINDECOMP(cr)) - { - dd_atom_sum_real(cr->dd, born->gpol_still_work); - } - - /* Calculate the radii */ - for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */ - { - if (born->use[i] != 0) - { - gpi = born->gpol[i]+born->gpol_still_work[i]; - gpi2 = gpi * gpi; - born->bRad[i] = factor*gmx::invsqrt(gpi2); - fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]); - } - } - - /* Extra communication required for DD */ - if (DOMAINDECOMP(cr)) - { - dd_atom_spread_real(cr->dd, born->bRad); - dd_atom_spread_real(cr->dd, fr->invsqrta); - } - - return 0; - -} - - -static int -calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top, - rvec x[], t_nblist *nl, - gmx_genborn_t *born, t_mdatoms *md) -{ - int i, k, n, ai, aj, nj0, nj1; - int shift; - real shX, shY, shZ; - real rai, raj, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai; - real rad, min_rad, rinv, rai_inv; - real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; - real lij2, uij2, lij3, uij3, t1, t2, t3; - real lij_inv, dlij, sk2_rinv, prod, log_term; - real doffset, raj_inv, dadx_val; - real *gb_radius; - - doffset = born->gb_doffset; - gb_radius = born->gb_radius; - - for (i = 0; i < born->nr; i++) - { - born->gpol_hct_work[i] = 0; - } - - /* Keep the compiler happy */ - n = 0; - - for (i = 0; i < nl->nri; i++) - { - ai = nl->iinr[i]; - - nj0 = nl->jindex[i]; - nj1 = nl->jindex[i+1]; - - /* Load shifts for this list */ - shift = nl->shift[i]; - shX = fr->shift_vec[shift][0]; - shY = fr->shift_vec[shift][1]; - shZ = fr->shift_vec[shift][2]; - - rai = gb_radius[ai]; - rai_inv = 1.0/rai; - - sk_ai = born->param[ai]; - sk2_ai = sk_ai*sk_ai; - - /* Load atom i coordinates, add shift vectors */ - ix1 = shX + x[ai][0]; - iy1 = shY + x[ai][1]; - iz1 = shZ + x[ai][2]; - - sum_ai = 0; - - for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) - { - aj = nl->jjnr[k]; - - jx1 = x[aj][0]; - jy1 = x[aj][1]; - jz1 = x[aj][2]; - - dx11 = ix1 - jx1; - dy11 = iy1 - jy1; - dz11 = iz1 - jz1; - - dr2 = dx11*dx11+dy11*dy11+dz11*dz11; - rinv = gmx::invsqrt(dr2); - dr = rinv*dr2; - - sk = born->param[aj]; - raj = gb_radius[aj]; - - /* aj -> ai interaction */ - if (rai < dr+sk) - { - lij = 1.0/(dr-sk); - dlij = 1.0; - - if (rai > dr-sk) - { - lij = rai_inv; - dlij = 0.0; - } - - lij2 = lij*lij; - lij3 = lij2*lij; - - uij = 1.0/(dr+sk); - uij2 = uij*uij; - uij3 = uij2*uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk*sk; - sk2_rinv = sk2*rinv; - prod = 0.25*sk2_rinv; - - log_term = std::log(uij*lij_inv); - - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + - prod*(-diff2); - - if (rai < sk-dr) - { - tmp = tmp + 2.0 * (rai_inv-lij); - } - - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ - /* rb2 is moved to chainrule */ - - sum_ai += 0.5*tmp; - } - else - { - dadx_val = 0.0; - } - fr->dadx[n++] = dadx_val; - - - /* ai -> aj interaction */ - if (raj < dr + sk_ai) - { - lij = 1.0/(dr-sk_ai); - dlij = 1.0; - raj_inv = 1.0/raj; - - if (raj > dr-sk_ai) - { - lij = raj_inv; - dlij = 0.0; - } - - lij2 = lij * lij; - lij3 = lij2 * lij; - - uij = 1.0/(dr+sk_ai); - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ - sk2_rinv = sk2*rinv; - prod = 0.25 * sk2_rinv; - - /* log_term = table_log(uij*lij_inv,born->log_table, - LOG_TABLE_ACCURACY); */ - log_term = std::log(uij*lij_inv); - - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + - prod*(-diff2); - - if (raj < sk_ai-dr) - { - tmp = tmp + 2.0 * (raj_inv-lij); - } - - /* duij = 1.0 */ - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */ - - born->gpol_hct_work[aj] += 0.5*tmp; - } - else - { - dadx_val = 0.0; - } - fr->dadx[n++] = dadx_val; - } - - born->gpol_hct_work[ai] += sum_ai; - } - - /* Parallel summations */ - if (DOMAINDECOMP(cr)) - { - dd_atom_sum_real(cr->dd, born->gpol_hct_work); - } - - for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */ - { - if (born->use[i] != 0) - { - rai = top->atomtypes.gb_radius[md->typeA[i]]-doffset; - sum_ai = 1.0/rai - born->gpol_hct_work[i]; - min_rad = rai + doffset; - rad = 1.0/sum_ai; - - born->bRad[i] = std::max(rad, min_rad); - fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]); - } - } - - /* Extra communication required for DD */ - if (DOMAINDECOMP(cr)) - { - dd_atom_spread_real(cr->dd, born->bRad); - dd_atom_spread_real(cr->dd, fr->invsqrta); - } - - - return 0; -} - -static int -calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top, - rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md) -{ - int i, k, ai, aj, nj0, nj1, n; - int shift; - real shX, shY, shZ; - real rai, raj, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai; - real sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2; - real log_term, prod, sk2_rinv, sk_ai, sk2_ai; - real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; - real lij2, uij2, lij3, uij3, dlij, t1, t2, t3; - real doffset, raj_inv, dadx_val; - real *gb_radius; - - /* Keep the compiler happy */ - n = 0; - - doffset = born->gb_doffset; - gb_radius = born->gb_radius; - - for (i = 0; i < born->nr; i++) - { - born->gpol_hct_work[i] = 0; - } - - for (i = 0; i < nl->nri; i++) - { - ai = nl->iinr[i]; - - nj0 = nl->jindex[i]; - nj1 = nl->jindex[i+1]; - - /* Load shifts for this list */ - shift = nl->shift[i]; - shX = fr->shift_vec[shift][0]; - shY = fr->shift_vec[shift][1]; - shZ = fr->shift_vec[shift][2]; - - rai = gb_radius[ai]; - rai_inv = 1.0/rai; - - sk_ai = born->param[ai]; - sk2_ai = sk_ai*sk_ai; - - /* Load atom i coordinates, add shift vectors */ - ix1 = shX + x[ai][0]; - iy1 = shY + x[ai][1]; - iz1 = shZ + x[ai][2]; - - sum_ai = 0; - - for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) - { - aj = nl->jjnr[k]; - - jx1 = x[aj][0]; - jy1 = x[aj][1]; - jz1 = x[aj][2]; - - dx11 = ix1 - jx1; - dy11 = iy1 - jy1; - dz11 = iz1 - jz1; - - dr2 = dx11*dx11+dy11*dy11+dz11*dz11; - rinv = gmx::invsqrt(dr2); - dr = dr2*rinv; - - /* sk is precalculated in init_gb() */ - sk = born->param[aj]; - raj = gb_radius[aj]; - - /* aj -> ai interaction */ - if (rai < dr+sk) - { - lij = 1.0/(dr-sk); - dlij = 1.0; - - if (rai > dr-sk) - { - lij = rai_inv; - dlij = 0.0; - } - - uij = 1.0/(dr+sk); - lij2 = lij * lij; - lij3 = lij2 * lij; - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk*sk; - sk2_rinv = sk2*rinv; - prod = 0.25*sk2_rinv; - - log_term = std::log(uij*lij_inv); - - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); - - if (rai < sk-dr) - { - tmp = tmp + 2.0 * (rai_inv-lij); - } - - /* duij = 1.0; */ - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - - sum_ai += 0.5*tmp; - } - else - { - dadx_val = 0.0; - } - fr->dadx[n++] = dadx_val; - - /* ai -> aj interaction */ - if (raj < dr + sk_ai) - { - lij = 1.0/(dr-sk_ai); - dlij = 1.0; - raj_inv = 1.0/raj; - - if (raj > dr-sk_ai) - { - lij = raj_inv; - dlij = 0.0; - } - - lij2 = lij * lij; - lij3 = lij2 * lij; - - uij = 1.0/(dr+sk_ai); - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ - sk2_rinv = sk2*rinv; - prod = 0.25 * sk2_rinv; - - /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ - log_term = std::log(uij*lij_inv); - - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); - - if (raj < sk_ai-dr) - { - tmp = tmp + 2.0 * (raj_inv-lij); - } - - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - - born->gpol_hct_work[aj] += 0.5*tmp; - - } - else - { - dadx_val = 0.0; - } - fr->dadx[n++] = dadx_val; - - } - born->gpol_hct_work[ai] += sum_ai; - - } - - /* Parallel summations */ - if (DOMAINDECOMP(cr)) - { - dd_atom_sum_real(cr->dd, born->gpol_hct_work); - } - - for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */ - { - if (born->use[i] != 0) - { - rai = top->atomtypes.gb_radius[md->typeA[i]]; - rai_inv2 = 1.0/rai; - rai = rai-doffset; - rai_inv = 1.0/rai; - sum_ai = rai * born->gpol_hct_work[i]; - sum_ai2 = sum_ai * sum_ai; - sum_ai3 = sum_ai2 * sum_ai; - - tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3); - born->bRad[i] = rai_inv - tsum*rai_inv2; - born->bRad[i] = 1.0 / born->bRad[i]; - - fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]); - - tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2); - born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2; - } - } - - /* Extra (local) communication required for DD */ - if (DOMAINDECOMP(cr)) - { - dd_atom_spread_real(cr->dd, born->bRad); - dd_atom_spread_real(cr->dd, fr->invsqrta); - dd_atom_spread_real(cr->dd, born->drobc); - } - - return 0; - -} - - - -int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top, - rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb *nrnb) -{ - int cnt; - int ndadx; - - if (fr->bAllvsAll && fr->dadx == nullptr) - { - /* We might need up to 8 atoms of padding before and after, - * and another 4 units to guarantee SSE alignment. - */ - fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12); - snew(fr->dadx_rawptr, fr->nalloc_dadx); - fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15))); - } - else - { - /* In the SSE-enabled gb-loops, when writing to dadx, we - * always write 2*4 elements at a time, even in the case with only - * 1-3 j particles, where we only really need to write 2*(1-3) - * elements. This is because we want dadx to be aligned to a 16- - * byte boundary, and being able to use _mm_store/load_ps - */ - ndadx = 2 * (nl->nrj + 3*nl->nri); - - /* First, reallocate the dadx array, we need 3 extra for SSE */ - if (ndadx + 3 > fr->nalloc_dadx) - { - fr->nalloc_dadx = over_alloc_large(ndadx) + 3; - srenew(fr->dadx_rawptr, fr->nalloc_dadx); - fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15))); - } - } - - if (fr->bAllvsAll) - { - cnt = md->homenr*(md->nr/2+1); - - if (ir->gb_algorithm == egbSTILL) - { - genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], &fr->AllvsAll_workgb); - /* 13 flops in outer loop, 47 flops in inner loop */ - inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47); - } - else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC) - { - genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], &fr->AllvsAll_workgb); - /* 24 flops in outer loop, 183 in inner */ - inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183); - } - else - { - gmx_fatal(FARGS, "Bad gb algorithm for all-vs-all interactions"); - } - return 0; - } - - /* Switch for determining which algorithm to use for Born radii calculation */ -#if GMX_DOUBLE - - switch (ir->gb_algorithm) - { - case egbSTILL: - calc_gb_rad_still(cr, fr, top, x, nl, born, md); - break; - case egbHCT: - calc_gb_rad_hct(cr, fr, top, x, nl, born, md); - break; - case egbOBC: - calc_gb_rad_obc(cr, fr, top, x, nl, born, md); - break; - - default: - gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm); - } - -#else - - switch (ir->gb_algorithm) - { - case egbSTILL: - calc_gb_rad_still(cr, fr, top, x, nl, born, md); - break; - case egbHCT: - calc_gb_rad_hct(cr, fr, top, x, nl, born, md); - break; - case egbOBC: - calc_gb_rad_obc(cr, fr, top, x, nl, born, md); - break; - - default: - gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm); - } - -#endif /* Double or single precision */ - - if (fr->bAllvsAll == FALSE) - { - switch (ir->gb_algorithm) - { - case egbSTILL: - /* 17 flops per outer loop iteration, 47 flops per inner loop */ - inc_nrnb(nrnb, eNR_BORN_RADII_STILL, nl->nri*17+nl->nrj*47); - break; - case egbHCT: - case egbOBC: - /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */ - inc_nrnb(nrnb, eNR_BORN_RADII_HCT_OBC, nl->nri*61+nl->nrj*183); - break; - - default: - break; - } - } - - return 0; -} - - - -real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale, - real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r, - real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph) -{ - int i, j, n0, m, nnn, ai, aj; - int ki; - - real isai, isaj; - real r, rsq11; - real rinv11, iq; - real isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2; - real vgb, fgb, fijC, dvdatmp, fscal; - real vctot; - - rvec dx; - ivec dt; - - t_iatom *forceatoms; - - /* Scale the electrostatics by gb_epsilon_solvent */ - facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent); - - gbtabscale = *p_gbtabscale; - vctot = 0.0; - - for (j = F_GB12; j <= F_GB14; j++) - { - forceatoms = idef->il[j].iatoms; - - for (i = 0; i < idef->il[j].nr; ) - { - /* To avoid reading in the interaction type, we just increment i to pass over - * the types in the forceatoms array, this saves some memory accesses - */ - i++; - ai = forceatoms[i++]; - aj = forceatoms[i++]; - - ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); - rsq11 = iprod(dx, dx); - - isai = invsqrta[ai]; - iq = (-1)*facel*charge[ai]; - - rinv11 = gmx::invsqrt(rsq11); - isaj = invsqrta[aj]; - isaprod = isai*isaj; - qq = isaprod*iq*charge[aj]; - gbscale = isaprod*gbtabscale; - r = rsq11*rinv11; - rt = r*gbscale; - n0 = static_cast(rt); - eps = rt-n0; - eps2 = eps*eps; - nnn = 4*n0; - Y = GBtab[nnn]; - F = GBtab[nnn+1]; - Geps = eps*GBtab[nnn+2]; - Heps2 = eps2*GBtab[nnn+3]; - Fp = F+Geps+Heps2; - VV = Y+eps*Fp; - FF = Fp+Geps+2.0*Heps2; - vgb = qq*VV; - fijC = qq*FF*gbscale; - dvdatmp = -(vgb+fijC*r)*0.5; - dvda[aj] = dvda[aj] + dvdatmp*isaj*isaj; - dvda[ai] = dvda[ai] + dvdatmp*isai*isai; - vctot = vctot + vgb; - fgb = -(fijC)*rinv11; - - if (graph) - { - ivec_sub(SHIFT_IVEC(graph, ai), SHIFT_IVEC(graph, aj), dt); - ki = IVEC2IS(dt); - } - - for (m = 0; (m < DIM); m++) /* 15 */ - { - fscal = fgb*dx[m]; - f[ai][m] += fscal; - f[aj][m] -= fscal; - fshift[ki][m] += fscal; - fshift[CENTRAL][m] -= fscal; - } - } - } - - return vctot; -} - -static real calc_gb_selfcorrections(t_commrec *cr, int natoms, - real *charge, gmx_genborn_t *born, real *dvda, double facel) -{ - int i, ai, at0, at1; - real rai, e, derb, q, q2, fi, rai_inv, vtot; - - if (DOMAINDECOMP(cr)) - { - at0 = 0; - at1 = cr->dd->nat_home; - } - else - { - at0 = 0; - at1 = natoms; - - } - - /* Scale the electrostatics by gb_epsilon_solvent */ - facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent); - - vtot = 0.0; - - /* Apply self corrections */ - for (i = at0; i < at1; i++) - { - ai = i; - - if (born->use[ai] == 1) - { - rai = born->bRad[ai]; - rai_inv = 1.0/rai; - q = charge[ai]; - q2 = q*q; - fi = facel*q2; - e = fi*rai_inv; - derb = 0.5*e*rai_inv*rai_inv; - dvda[ai] += derb*rai; - vtot -= 0.5*e; - } - } - - return vtot; - -} - -static real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *born, gmx_localtop_t *top, - real *dvda, t_mdatoms *md) -{ - int ai, i, at0, at1; - real e, es, rai, term, probe, tmp, factor; - real rbi_inv, rbi_inv2; - - if (DOMAINDECOMP(cr)) - { - at0 = 0; - at1 = cr->dd->nat_home; - } - else - { - at0 = 0; - at1 = natoms; - } - - /* factor is the surface tension */ - factor = born->sa_surface_tension; - - es = 0; - probe = 0.14; - term = M_PI*4; - - for (i = at0; i < at1; i++) - { - ai = i; - - if (born->use[ai] == 1) - { - rai = top->atomtypes.gb_radius[md->typeA[ai]]; - rbi_inv = fr->invsqrta[ai]; - rbi_inv2 = rbi_inv * rbi_inv; - tmp = (rai*rbi_inv2)*(rai*rbi_inv2); - tmp = tmp*tmp*tmp; - e = factor*term*(rai+probe)*(rai+probe)*tmp; - dvda[ai] = dvda[ai] - 6*e*rbi_inv2; - es = es + e; - } - } - - return es; -} - - - -static real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[], - rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born) -{ - int i, k, n, ai, aj, nj0, nj1, n0, n1; - int shift; - real shX, shY, shZ; - real fgb, rbi, fix1, fiy1, fiz1; - real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; - real tx, ty, tz, rbai, rbaj, fgb_ai; - real *rb; - - n = 0; - rb = born->work; - - n0 = 0; - n1 = natoms; - - if (gb_algorithm == egbSTILL) - { - for (i = n0; i < n1; i++) - { - rbi = born->bRad[i]; - rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0; - } - } - else if (gb_algorithm == egbHCT) - { - for (i = n0; i < n1; i++) - { - rbi = born->bRad[i]; - rb[i] = rbi * rbi * dvda[i]; - } - } - else if (gb_algorithm == egbOBC) - { - for (i = n0; i < n1; i++) - { - rbi = born->bRad[i]; - rb[i] = rbi * rbi * born->drobc[i] * dvda[i]; - } - } - - for (i = 0; i < nl->nri; i++) - { - ai = nl->iinr[i]; - - nj0 = nl->jindex[i]; - nj1 = nl->jindex[i+1]; - - /* Load shifts for this list */ - shift = nl->shift[i]; - shX = shift_vec[shift][0]; - shY = shift_vec[shift][1]; - shZ = shift_vec[shift][2]; - - /* Load atom i coordinates, add shift vectors */ - ix1 = shX + x[ai][0]; - iy1 = shY + x[ai][1]; - iz1 = shZ + x[ai][2]; - - fix1 = 0; - fiy1 = 0; - fiz1 = 0; - - rbai = rb[ai]; - - for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) - { - aj = nl->jjnr[k]; - - jx1 = x[aj][0]; - jy1 = x[aj][1]; - jz1 = x[aj][2]; - - dx11 = ix1 - jx1; - dy11 = iy1 - jy1; - dz11 = iz1 - jz1; - - rbaj = rb[aj]; - - fgb = rbai*dadx[n++]; - fgb_ai = rbaj*dadx[n++]; - - /* Total force between ai and aj is the sum of ai->aj and aj->ai */ - fgb = fgb + fgb_ai; - - tx = fgb * dx11; - ty = fgb * dy11; - tz = fgb * dz11; - - fix1 = fix1 + tx; - fiy1 = fiy1 + ty; - fiz1 = fiz1 + tz; - - /* Update force on atom aj */ - t[aj][0] = t[aj][0] - tx; - t[aj][1] = t[aj][1] - ty; - t[aj][2] = t[aj][2] - tz; - } - - /* Update force and shift forces on atom ai */ - t[ai][0] = t[ai][0] + fix1; - t[ai][1] = t[ai][1] + fiy1; - t[ai][2] = t[ai][2] + fiz1; - - fshift[shift][0] = fshift[shift][0] + fix1; - fshift[shift][1] = fshift[shift][1] + fiy1; - fshift[shift][2] = fshift[shift][2] + fiz1; - - } - - return 0; -} - - -void -calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top, - rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb, - const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd) -{ - int cnt; - - /* PBC or not? */ - const t_pbc *pbc_null; - - if (fr->bMolPBC) - { - pbc_null = pbc; - } - else - { - pbc_null = nullptr; - } - - if (sa_algorithm == esaAPPROX) - { - /* Do a simple ACE type approximation for the non-polar solvation */ - enerd->term[F_NPSOLVATION] += calc_gb_nonpolar(cr, fr, born->nr, born, top, fr->dvda, md); - } - - /* Calculate the bonded GB-interactions using either table or analytical formula */ - enerd->term[F_GBPOL] += gb_bonds_tab(x, f, fr->fshift, md->chargeA, &(fr->gbtabscale), - fr->invsqrta, fr->dvda, fr->gbtab->data, idef, born->epsilon_r, born->gb_epsilon_solvent, fr->ic->epsfac, pbc_null, graph); - - /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */ - enerd->term[F_GBPOL] += calc_gb_selfcorrections(cr, born->nr, md->chargeA, born, fr->dvda, fr->ic->epsfac); - - /* If parallel, sum the derivative of the potential w.r.t the born radii */ - if (DOMAINDECOMP(cr)) - { - dd_atom_sum_real(cr->dd, fr->dvda); - dd_atom_spread_real(cr->dd, fr->dvda); - } - - if (fr->bAllvsAll) - { - genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb); - cnt = md->homenr*(md->nr/2+1); - /* 9 flops for outer loop, 15 for inner */ - inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15); - return; - } - - calc_gb_chainrule(fr->natoms_force, fr->gblist, fr->dadx, fr->dvda, - x, f, fr->fshift, fr->shift_vec, gb_algorithm, born); - - if (!fr->bAllvsAll) - { - /* 9 flops for outer loop, 15 for inner */ - inc_nrnb(nrnb, eNR_BORN_CHAINRULE, fr->gblist->nri*9+fr->gblist->nrj*15); - } -} - -static void add_j_to_gblist(gbtmpnbl_t *list, int aj) -{ - if (list->naj >= list->aj_nalloc) - { - list->aj_nalloc = over_alloc_large(list->naj+1); - srenew(list->aj, list->aj_nalloc); - } - - list->aj[list->naj++] = aj; -} - -static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists, int shift) -{ - int ind, i; - - /* Search the list with the same shift, if there is one */ - ind = 0; - while (ind < lists->nlist && shift != lists->list[ind].shift) - { - ind++; - } - if (ind == lists->nlist) - { - if (lists->nlist == lists->list_nalloc) - { - lists->list_nalloc++; - srenew(lists->list, lists->list_nalloc); - for (i = lists->nlist; i < lists->list_nalloc; i++) - { - lists->list[i].aj = nullptr; - lists->list[i].aj_nalloc = 0; - } - - } - - lists->list[lists->nlist].shift = shift; - lists->list[lists->nlist].naj = 0; - lists->nlist++; - } - - return &lists->list[ind]; -} - -static void add_bondeds_to_gblist(t_ilist *il, - gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x, - struct gbtmpnbls *nls) -{ - int ind, j, ai, aj, found; - rvec dx; - ivec dt; - gbtmpnbl_t *list; - - for (ind = 0; ind < il->nr; ind += 3) - { - ai = il->iatoms[ind+1]; - aj = il->iatoms[ind+2]; - - int shift = CENTRAL; - if (g != nullptr) - { - rvec_sub(x[ai], x[aj], dx); - ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt); - shift = IVEC2IS(dt); - } - else if (bMolPBC) - { - shift = pbc_dx_aiuc(pbc, x[ai], x[aj], dx); - } - - /* Find the list for this shift or create one */ - list = find_gbtmplist(&nls[ai], shift); - - found = 0; - - /* So that we do not add the same bond twice. - * This happens with some constraints between 1-3 atoms - * that are in the bond-list but should not be in the GB nb-list */ - for (j = 0; j < list->naj; j++) - { - if (list->aj[j] == aj) - { - found = 1; - } - } - - if (found == 0) - { - if (ai == aj) - { - gmx_incons("ai == aj"); - } - - add_j_to_gblist(list, aj); - } - } -} - - -int make_gb_nblist(t_commrec *cr, int gb_algorithm, - rvec x[], matrix box, - t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born) -{ - int i, j, k, n, nj0, nj1, ai, shift, s; - t_nblist *nblist; - t_pbc pbc; - - struct gbtmpnbls *nls; - gbtmpnbl_t *list = nullptr; - - set_pbc(&pbc, fr->ePBC, box); - nls = born->nblist_work; - - for (i = 0; i < born->nr; i++) - { - nls[i].nlist = 0; - } - - if (fr->bMolPBC) - { - set_pbc_dd(&pbc, fr->ePBC, cr->dd->nc, TRUE, box); - } - - switch (gb_algorithm) - { - case egbHCT: - case egbOBC: - /* Loop over 1-2, 1-3 and 1-4 interactions */ - for (j = F_GB12; j <= F_GB14; j++) - { - add_bondeds_to_gblist(&idef->il[j], fr->bMolPBC, &pbc, graph, x, nls); - } - break; - case egbSTILL: - /* Loop over 1-4 interactions */ - add_bondeds_to_gblist(&idef->il[F_GB14], fr->bMolPBC, &pbc, graph, x, nls); - break; - default: - gmx_incons("Unknown GB algorithm"); - } - - /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */ - for (n = 0; (n < fr->nnblists); n++) - { - for (i = 0; (i < eNL_NR); i++) - { - nblist = &(fr->nblists[n].nlist_sr[i]); - - if (nblist->nri > 0 && (i == eNL_VDWQQ || i == eNL_QQ)) - { - for (j = 0; j < nblist->nri; j++) - { - ai = nblist->iinr[j]; - shift = nblist->shift[j]; - - /* Find the list for this shift or create one */ - list = find_gbtmplist(&nls[ai], shift); - - nj0 = nblist->jindex[j]; - nj1 = nblist->jindex[j+1]; - - /* Add all the j-atoms in the non-bonded list to the GB list */ - for (k = nj0; k < nj1; k++) - { - add_j_to_gblist(list, nblist->jjnr[k]); - } - } - } - } - } - - /* Zero out some counters */ - fr->gblist->nri = 0; - fr->gblist->nrj = 0; - - fr->gblist->jindex[0] = fr->gblist->nri; - - for (i = 0; i < fr->natoms_force; i++) - { - for (s = 0; s < nls[i].nlist; s++) - { - list = &nls[i].list[s]; - - /* Only add those atoms that actually have neighbours */ - if (born->use[i] != 0) - { - fr->gblist->iinr[fr->gblist->nri] = i; - fr->gblist->shift[fr->gblist->nri] = list->shift; - fr->gblist->nri++; - - for (k = 0; k < list->naj; k++) - { - /* Memory allocation for jjnr */ - if (fr->gblist->nrj >= fr->gblist->maxnrj) - { - fr->gblist->maxnrj += over_alloc_large(fr->gblist->maxnrj); - - if (debug) - { - fprintf(debug, "Increasing GB neighbourlist j size to %d\n", fr->gblist->maxnrj); - } - - srenew(fr->gblist->jjnr, fr->gblist->maxnrj); - } - - /* Put in list */ - if (i == list->aj[k]) - { - gmx_incons("i == list->aj[k]"); - } - fr->gblist->jjnr[fr->gblist->nrj++] = list->aj[k]; - } - - fr->gblist->jindex[fr->gblist->nri] = fr->gblist->nrj; - } - } - } - - return 0; -} - -void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm) -{ - int i, at0, at1; - gmx_domdec_t *dd = nullptr; - - if (DOMAINDECOMP(cr)) - { - dd = cr->dd; - at0 = 0; - at1 = dd->nat_tot; - } - else - { - /* Single node, just copy pointers and return */ - if (gb_algorithm == egbSTILL) - { - born->gpol = born->gpol_globalindex; - born->vsolv = born->vsolv_globalindex; - born->gb_radius = born->gb_radius_globalindex; - } - else - { - born->param = born->param_globalindex; - born->gb_radius = born->gb_radius_globalindex; - } - - born->use = born->use_globalindex; - - return; - } - - /* Reallocation of local arrays if necessary */ - /* fr->natoms_force is equal to dd->nat_tot */ - if (DOMAINDECOMP(cr) && dd->nat_tot > born->nalloc) - { - int nalloc; - - nalloc = dd->nat_tot; - - /* Arrays specific to different gb algorithms */ - if (gb_algorithm == egbSTILL) - { - srenew(born->gpol, nalloc+3); - srenew(born->vsolv, nalloc+3); - srenew(born->gb_radius, nalloc+3); - for (i = born->nalloc; (i < nalloc+3); i++) - { - born->gpol[i] = 0; - born->vsolv[i] = 0; - born->gb_radius[i] = 0; - } - } - else - { - srenew(born->param, nalloc+3); - srenew(born->gb_radius, nalloc+3); - for (i = born->nalloc; (i < nalloc+3); i++) - { - born->param[i] = 0; - born->gb_radius[i] = 0; - } - } - - /* All gb-algorithms use the array for vsites exclusions */ - srenew(born->use, nalloc+3); - for (i = born->nalloc; (i < nalloc+3); i++) - { - born->use[i] = 0; - } - - born->nalloc = nalloc; - } - - /* With dd, copy algorithm specific arrays */ - if (gb_algorithm == egbSTILL) - { - for (i = at0; i < at1; i++) - { - born->gpol[i] = born->gpol_globalindex[dd->gatindex[i]]; - born->vsolv[i] = born->vsolv_globalindex[dd->gatindex[i]]; - born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]]; - born->use[i] = born->use_globalindex[dd->gatindex[i]]; - } - } - else - { - for (i = at0; i < at1; i++) - { - born->param[i] = born->param_globalindex[dd->gatindex[i]]; - born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]]; - born->use[i] = born->use_globalindex[dd->gatindex[i]]; - } - } -} diff --git a/src/gromacs/mdlib/genborn.h b/src/gromacs/mdlib/genborn.h deleted file mode 100644 index a631788a0d..0000000000 --- a/src/gromacs/mdlib/genborn.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#ifndef GMX_MDLIB_GENBORN_H -#define GMX_MDLIB_GENBORN_H - -#include "gromacs/math/utilities.h" -#include "gromacs/math/vectypes.h" - -struct gmx_genborn_t; -struct gmx_enerdata_t; -struct gmx_localtop_t; -struct gmx_mtop_t; -struct t_commrec; -struct t_forcerec; -struct t_graph; -struct t_idef; -struct t_inputrec; -struct t_mdatoms; -struct t_nblist; -struct t_nrnb; -struct t_pbc; - -typedef struct -{ - int nbonds; - int bond[10]; - real length[10]; -} genborn_bonds_t; - -typedef struct gbtmpnbls *gbtmpnbls_t; - -/* Struct to hold all the information for GB */ -typedef struct gmx_genborn_t -{ - int nr; /* number of atoms, length of arrays below */ - int n12; /* number of 1-2 (bond) interactions */ - int n13; /* number of 1-3 (angle) terms */ - int n14; /* number of 1-4 (torsion) terms */ - int nalloc; /* Allocation of local arrays (with DD) */ - - - /* Arrays below that end with _globalindex are used for setting up initial values of - * all gb parameters and values. They all have length natoms, which for DD is the - * global atom number. - * Values are then taken from these arrays to local copies, that have names without - * _globalindex, in the routine make_local_gb(), which is called once for single - * node runs, and for DD at every call to dd_partition_system - */ - - real *gpol; /* Atomic polarisation energies */ - real *gpol_globalindex; /* */ - real *gpol_still_work; /* Work array for Still model */ - real *gpol_hct_work; /* Work array for HCT/OBC models */ - real *bRad; /* Atomic Born radii */ - real *vsolv; /* Atomic solvation volumes */ - real *vsolv_globalindex; /* */ - real *gb_radius; /* Radius info, copied from atomtypes */ - real *gb_radius_globalindex; - - int *use; /* Array that till if this atom does GB */ - int *use_globalindex; /* Global array for parallelization */ - - real es; /* Solvation energy and derivatives */ - real *asurf; /* Atomic surface area */ - rvec *dasurf; /* Surface area derivatives */ - real as; /* Total surface area */ - - real *drobc; /* Parameters for OBC chain rule calculation */ - real *param; /* Precomputed factor rai*atype->S_hct for HCT/OBC */ - real *param_globalindex; /* */ - - real *log_table; /* Table for logarithm lookup */ - - real obc_alpha; /* OBC parameters */ - real obc_beta; /* OBC parameters */ - real obc_gamma; /* OBC parameters */ - real gb_doffset; /* Dielectric offset for Still/HCT/OBC */ - real gb_epsilon_solvent; /* */ - real epsilon_r; /* Used for inner dielectric */ - - real sa_surface_tension; /* Surface tension for non-polar solvation */ - - real *work; /* Used for parallel summation and in the chain rule, length natoms */ - real *buf; /* Used for parallel summation and in the chain rule, length natoms */ - int *count; /* Used for setting up the special gb nblist, length natoms */ - gbtmpnbls_t nblist_work; /* Used for setting up the special gb nblist, dim natoms*nblist_work_nalloc */ - int nblist_work_nalloc; /* Length of second dimension of nblist_work */ -} -gmx_genborn_t; -/* Still parameters - make sure to edit in genborn_sse.c too if you change these! */ -#define STILL_P1 0.073*0.1 /* length */ -#define STILL_P2 0.921*0.1*CAL2JOULE /* energy*length */ -#define STILL_P3 6.211*0.1*CAL2JOULE /* energy*length */ -#define STILL_P4 15.236*0.1*CAL2JOULE -#define STILL_P5 1.254 - -#define STILL_P5INV (1.0/STILL_P5) -#define STILL_PIP5 (M_PI*STILL_P5) - - -/* Initialise GB stuff */ -int init_gb(struct gmx_genborn_t **p_born, - struct t_forcerec *fr, const struct t_inputrec *ir, - const gmx_mtop_t *mtop, int gb_algorithm); - - -/* Born radii calculations, both with and without SSE acceleration */ -int calc_gb_rad(struct t_commrec *cr, struct t_forcerec *fr, struct t_inputrec *ir, gmx_localtop_t *top, rvec x[], t_nblist *nl, struct gmx_genborn_t *born, t_mdatoms *md, t_nrnb *nrnb); - - - -/* Bonded GB interactions */ -real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale, - real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r, - real gb_epsilon_solvent, real facel, const struct t_pbc *pbc, - const struct t_graph *graph); - - - - -/* Functions for calculating adjustments due to ie chain rule terms */ -void -calc_gb_forces(struct t_commrec *cr, t_mdatoms *md, struct gmx_genborn_t *born, gmx_localtop_t *top, - rvec x[], rvec f[], struct t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb, - const struct t_pbc *pbc, const struct t_graph *graph, struct gmx_enerdata_t *enerd); - - -int -make_gb_nblist(struct t_commrec *cr, int gb_algorithm, - rvec x[], matrix box, - struct t_forcerec *fr, t_idef *idef, struct t_graph *graph, struct gmx_genborn_t *born); - -void -make_local_gb(const struct t_commrec *cr, struct gmx_genborn_t *born, int gb_algorithm); - -#endif diff --git a/src/gromacs/mdlib/genborn_allvsall.cpp b/src/gromacs/mdlib/genborn_allvsall.cpp deleted file mode 100644 index 1d96860895..0000000000 --- a/src/gromacs/mdlib/genborn_allvsall.cpp +++ /dev/null @@ -1,1108 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2009, The GROMACS Development Team. - * Copyright (c) 2010,2014,2015,2017, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#include "gmxpre.h" - -#include "genborn_allvsall.h" - -#include - -#include - -#include "gromacs/gmxlib/network.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/genborn.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/topology/topology.h" -#include "gromacs/utility/smalloc.h" - - -typedef struct -{ - int * jindex_gb; - int ** exclusion_mask_gb; -} -gmx_allvsallgb2_data_t; - -static int -calc_maxoffset(int i, int natoms) -{ - int maxoffset; - - if ((natoms % 2) == 1) - { - /* Odd number of atoms, easy */ - maxoffset = natoms/2; - } - else if ((natoms % 4) == 0) - { - /* Multiple of four is hard */ - if (i < natoms/2) - { - if ((i % 2) == 0) - { - maxoffset = natoms/2; - } - else - { - maxoffset = natoms/2-1; - } - } - else - { - if ((i % 2) == 1) - { - maxoffset = natoms/2; - } - else - { - maxoffset = natoms/2-1; - } - } - } - else - { - /* natoms/2 = odd */ - if ((i % 2) == 0) - { - maxoffset = natoms/2; - } - else - { - maxoffset = natoms/2-1; - } - } - - return maxoffset; -} - -static void -setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata, - t_ilist * ilist, - int natoms, - gmx_bool bInclude12, - gmx_bool bInclude13, - gmx_bool bInclude14) -{ - int i, j, k; - int a1, a2; - int max_offset; - int max_excl_offset; - - /* This routine can appear to be a bit complex, but it is mostly book-keeping. - * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates - * whether they should interact or not. - * - * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction - * should be present, otherwise 0. Since exclusions typically only occur when i & j are close, - * we create a jindex array with three elements per i atom: the starting point, the point to - * which we need to check exclusions, and the end point. - * This way we only have to allocate a short exclusion mask per i atom. - */ - - /* Allocate memory for jindex arrays */ - snew(aadata->jindex_gb, 3*natoms); - - /* Pointer to lists with exclusion masks */ - snew(aadata->exclusion_mask_gb, natoms); - - for (i = 0; i < natoms; i++) - { - /* Start */ - aadata->jindex_gb[3*i] = i+1; - max_offset = calc_maxoffset(i, natoms); - - /* first check the max range of atoms to EXCLUDE */ - max_excl_offset = 0; - if (!bInclude12) - { - for (j = 0; j < ilist[F_GB12].nr; j += 3) - { - a1 = ilist[F_GB12].iatoms[j+1]; - a2 = ilist[F_GB12].iatoms[j+2]; - - if (a1 == i) - { - k = a2-a1; - } - else if (a2 == i) - { - k = a1+natoms-a2; - } - else - { - continue; - } - if (k > 0 && k <= max_offset) - { - max_excl_offset = std::max(k, max_excl_offset); - } - } - } - if (!bInclude13) - { - for (j = 0; j < ilist[F_GB13].nr; j += 3) - { - a1 = ilist[F_GB13].iatoms[j+1]; - a2 = ilist[F_GB13].iatoms[j+2]; - - - if (a1 == i) - { - k = a2-a1; - } - else if (a2 == i) - { - k = a1+natoms-a2; - } - else - { - continue; - } - if (k > 0 && k <= max_offset) - { - max_excl_offset = std::max(k, max_excl_offset); - } - } - } - if (!bInclude14) - { - for (j = 0; j < ilist[F_GB14].nr; j += 3) - { - a1 = ilist[F_GB14].iatoms[j+1]; - a2 = ilist[F_GB14].iatoms[j+2]; - - - if (a1 == i) - { - k = a2-a1; - } - else if (a2 == i) - { - k = a1+natoms-a2; - } - else - { - continue; - } - if (k > 0 && k <= max_offset) - { - max_excl_offset = std::max(k, max_excl_offset); - } - } - } - max_excl_offset = std::min(max_offset, max_excl_offset); - - aadata->jindex_gb[3*i+1] = i+1+max_excl_offset; - - snew(aadata->exclusion_mask_gb[i], max_excl_offset); - - /* Include everything by default */ - for (j = 0; j < max_excl_offset; j++) - { - /* Use all-ones to mark interactions that should be present, compatible with SSE */ - aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF; - } - /* Go through exclusions again */ - if (!bInclude12) - { - for (j = 0; j < ilist[F_GB12].nr; j += 3) - { - a1 = ilist[F_GB12].iatoms[j+1]; - a2 = ilist[F_GB12].iatoms[j+2]; - - if (a1 == i) - { - k = a2-a1; - } - else if (a2 == i) - { - k = a1+natoms-a2; - } - else - { - continue; - } - if (k > 0 && k <= max_offset) - { - aadata->exclusion_mask_gb[i][k-1] = 0; - } - } - } - if (!bInclude13) - { - for (j = 0; j < ilist[F_GB13].nr; j += 3) - { - a1 = ilist[F_GB13].iatoms[j+1]; - a2 = ilist[F_GB13].iatoms[j+2]; - - if (a1 == i) - { - k = a2-a1; - } - else if (a2 == i) - { - k = a1+natoms-a2; - } - else - { - continue; - } - if (k > 0 && k <= max_offset) - { - aadata->exclusion_mask_gb[i][k-1] = 0; - } - } - } - if (!bInclude14) - { - for (j = 0; j < ilist[F_GB14].nr; j += 3) - { - a1 = ilist[F_GB14].iatoms[j+1]; - a2 = ilist[F_GB14].iatoms[j+2]; - - if (a1 == i) - { - k = a2-a1; - } - else if (a2 == i) - { - k = a1+natoms-a2; - } - else - { - continue; - } - if (k > 0 && k <= max_offset) - { - aadata->exclusion_mask_gb[i][k-1] = 0; - } - } - } - - /* End */ - - /* End */ - aadata->jindex_gb[3*i+2] = i+1+max_offset; - } -} - - -static void -genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata, - t_ilist * ilist, - int natoms, - gmx_bool bInclude12, - gmx_bool bInclude13, - gmx_bool bInclude14) -{ - gmx_allvsallgb2_data_t *aadata; - - snew(aadata, 1); - *p_aadata = aadata; - - setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14); -} - - - -int -genborn_allvsall_calc_still_radii(t_forcerec * fr, - t_mdatoms * mdatoms, - gmx_genborn_t * born, - gmx_localtop_t * top, - real * x, - void * work) -{ - gmx_allvsallgb2_data_t *aadata; - int natoms; - int ni0, ni1; - int nj0, nj1, nj2; - int i, j, k, n; - int * mask; - - real ix, iy, iz; - real jx, jy, jz; - real dx, dy, dz; - real rsq, rinv; - real gpi, rai, vai; - real prod_ai; - real irsq, idr4, idr6; - real raj, rvdw, ratio; - real vaj, ccf, dccf, theta, cosq; - real term, prod, icf4, icf6, gpi2, factor, sinq; - - natoms = mdatoms->nr; - ni0 = 0; - ni1 = mdatoms->homenr; - factor = 0.5*ONE_4PI_EPS0; - n = 0; - - aadata = *((gmx_allvsallgb2_data_t **)work); - - if (aadata == nullptr) - { - genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr, - FALSE, FALSE, TRUE); - *((gmx_allvsallgb2_data_t **)work) = aadata; - } - - - for (i = 0; i < born->nr; i++) - { - born->gpol_still_work[i] = 0; - } - - - for (i = ni0; i < ni1; i++) - { - /* We assume shifts are NOT used for all-vs-all interactions */ - - /* Load i atom data */ - ix = x[3*i]; - iy = x[3*i+1]; - iz = x[3*i+2]; - - gpi = 0.0; - - rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]; - vai = born->vsolv[i]; - prod_ai = STILL_P4*vai; - - /* Load limits for loop over neighbors */ - nj0 = aadata->jindex_gb[3*i]; - nj1 = aadata->jindex_gb[3*i+1]; - nj2 = aadata->jindex_gb[3*i+2]; - - mask = aadata->exclusion_mask_gb[i]; - - /* Prologue part, including exclusion mask */ - for (j = nj0; j < nj1; j++, mask++) - { - if (*mask != 0) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - rsq = dx*dx+dy*dy+dz*dz; - - /* Calculate 1/r and 1/r2 */ - rinv = gmx::invsqrt(rsq); - irsq = rinv*rinv; - idr4 = irsq*irsq; - idr6 = idr4*irsq; - - raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]; - - rvdw = rai + raj; - - ratio = rsq / (rvdw * rvdw); - vaj = born->vsolv[k]; - - - if (ratio > STILL_P5INV) - { - ccf = 1.0; - dccf = 0.0; - } - else - { - theta = ratio*STILL_PIP5; - cosq = cos(theta); - term = 0.5*(1.0-cosq); - ccf = term*term; - sinq = 1.0 - cosq*cosq; - dccf = 2.0*term*sinq*gmx::invsqrt(sinq)*theta; - } - - prod = STILL_P4*vaj; - icf4 = ccf*idr4; - icf6 = (4*ccf-dccf)*idr6; - - born->gpol_still_work[k] += prod_ai*icf4; - gpi = gpi+prod*icf4; - - /* Save ai->aj and aj->ai chain rule terms */ - fr->dadx[n++] = prod*icf6; - fr->dadx[n++] = prod_ai*icf6; - - /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */ - - } - } - - /* Main part, no exclusions */ - for (j = nj1; j < nj2; j++) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - rsq = dx*dx+dy*dy+dz*dz; - - /* Calculate 1/r and 1/r2 */ - rinv = gmx::invsqrt(rsq); - irsq = rinv*rinv; - idr4 = irsq*irsq; - idr6 = idr4*irsq; - - raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]; - - rvdw = rai + raj; - - ratio = rsq / (rvdw * rvdw); - vaj = born->vsolv[k]; - - if (ratio > STILL_P5INV) - { - ccf = 1.0; - dccf = 0.0; - } - else - { - theta = ratio*STILL_PIP5; - cosq = cos(theta); - term = 0.5*(1.0-cosq); - ccf = term*term; - sinq = 1.0 - cosq*cosq; - dccf = 2.0*term*sinq*gmx::invsqrt(sinq)*theta; - } - - prod = STILL_P4*vaj; - icf4 = ccf*idr4; - icf6 = (4*ccf-dccf)*idr6; - - born->gpol_still_work[k] += prod_ai*icf4; - gpi = gpi+prod*icf4; - - /* Save ai->aj and aj->ai chain rule terms */ - fr->dadx[n++] = prod*icf6; - fr->dadx[n++] = prod_ai*icf6; - } - born->gpol_still_work[i] += gpi; - } - - /* Parallel summations would go here if ever implemented with DD */ - - /* Calculate the radii */ - for (i = 0; i < natoms; i++) - { - if (born->use[i] != 0) - { - gpi = born->gpol[i]+born->gpol_still_work[i]; - gpi2 = gpi * gpi; - born->bRad[i] = factor*gmx::invsqrt(gpi2); - fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]); - } - } - - return 0; -} - - - -int -genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr, - t_mdatoms * mdatoms, - gmx_genborn_t * born, - int gb_algorithm, - gmx_localtop_t * top, - real * x, - void * work) -{ - gmx_allvsallgb2_data_t *aadata; - int natoms; - int ni0, ni1; - int nj0, nj1, nj2; - int i, j, k, n; - int * mask; - - real ix, iy, iz; - real jx, jy, jz; - real dx, dy, dz; - real rsq, rinv; - real prod, raj; - real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai; - real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term; - real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum; - real tchain; - real dadxi, dadxj; - real rad, min_rad; - - natoms = mdatoms->nr; - ni0 = 0; - ni1 = mdatoms->homenr; - - n = 0; - doffset = born->gb_doffset; - - aadata = *((gmx_allvsallgb2_data_t **)work); - - if (aadata == nullptr) - { - genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr, - TRUE, TRUE, TRUE); - *((gmx_allvsallgb2_data_t **)work) = aadata; - } - - for (i = 0; i < born->nr; i++) - { - born->gpol_hct_work[i] = 0; - } - - for (i = ni0; i < ni1; i++) - { - /* We assume shifts are NOT used for all-vs-all interactions */ - - /* Load i atom data */ - ix = x[3*i]; - iy = x[3*i+1]; - iz = x[3*i+2]; - - rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset; - rai_inv = 1.0/rai; - - sk_ai = born->param[i]; - sk2_ai = sk_ai*sk_ai; - - sum_ai = 0; - - /* Load limits for loop over neighbors */ - nj0 = aadata->jindex_gb[3*i]; - nj1 = aadata->jindex_gb[3*i+1]; - nj2 = aadata->jindex_gb[3*i+2]; - - mask = aadata->exclusion_mask_gb[i]; - - /* Prologue part, including exclusion mask */ - for (j = nj0; j < nj1; j++, mask++) - { - if (*mask != 0) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - rsq = dx*dx+dy*dy+dz*dz; - - /* Calculate 1/r and 1/r2 */ - rinv = gmx::invsqrt(rsq); - dr = rsq*rinv; - - /* sk is precalculated in init_gb() */ - sk = born->param[k]; - raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; - - /* aj -> ai interaction */ - - - if (rai < dr+sk) - { - lij = 1.0/(dr-sk); - dlij = 1.0; - - if (rai > dr-sk) - { - lij = rai_inv; - dlij = 0.0; - } - - uij = 1.0/(dr+sk); - lij2 = lij * lij; - lij3 = lij2 * lij; - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk*sk; - sk2_rinv = sk2*rinv; - prod = 0.25*sk2_rinv; - - log_term = std::log(uij*lij_inv); - /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); - - if (rai < sk-dr) - { - tmp = tmp + 2.0 * (rai_inv-lij); - } - - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr); - - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadxi = (dlij*t1+t2+t3)*rinv; - - sum_ai += 0.5*tmp; - } - else - { - dadxi = 0.0; - } - - /* ai -> aj interaction */ - if (raj < dr + sk_ai) - { - lij = 1.0/(dr-sk_ai); - dlij = 1.0; - raj_inv = 1.0/raj; - - if (raj > dr-sk_ai) - { - lij = raj_inv; - dlij = 0.0; - } - - lij2 = lij * lij; - lij3 = lij2 * lij; - - uij = 1.0/(dr+sk_ai); - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ - sk2_rinv = sk2*rinv; - prod = 0.25 * sk2_rinv; - - /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ - log_term = std::log(uij*lij_inv); - - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); - - if (raj < sk_ai-dr) - { - tmp = tmp + 2.0 * (raj_inv-lij); - } - - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - - born->gpol_hct_work[k] += 0.5*tmp; - } - else - { - dadxj = 0.0; - } - fr->dadx[n++] = dadxi; - fr->dadx[n++] = dadxj; - - } - } - - /* Main part, no exclusions */ - for (j = nj1; j < nj2; j++) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - rsq = dx*dx+dy*dy+dz*dz; - - /* Calculate 1/r and 1/r2 */ - rinv = gmx::invsqrt(rsq); - dr = rsq*rinv; - - /* sk is precalculated in init_gb() */ - sk = born->param[k]; - raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; - - /* aj -> ai interaction */ - if (rai < dr+sk) - { - lij = 1.0/(dr-sk); - dlij = 1.0; - - if (rai > dr-sk) - { - lij = rai_inv; - dlij = 0.0; - } - - uij = 1.0/(dr+sk); - lij2 = lij * lij; - lij3 = lij2 * lij; - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk*sk; - sk2_rinv = sk2*rinv; - prod = 0.25*sk2_rinv; - - log_term = std::log(uij*lij_inv); - /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); - - if (rai < sk-dr) - { - tmp = tmp + 2.0 * (rai_inv-lij); - } - - /* duij = 1.0; */ - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - - sum_ai += 0.5*tmp; - } - else - { - dadxi = 0.0; - } - - /* ai -> aj interaction */ - if (raj < dr + sk_ai) - { - lij = 1.0/(dr-sk_ai); - dlij = 1.0; - raj_inv = 1.0/raj; - - if (raj > dr-sk_ai) - { - lij = raj_inv; - dlij = 0.0; - } - - lij2 = lij * lij; - lij3 = lij2 * lij; - - uij = 1.0/(dr+sk_ai); - uij2 = uij * uij; - uij3 = uij2 * uij; - - diff2 = uij2-lij2; - - lij_inv = gmx::invsqrt(lij2); - sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ - sk2_rinv = sk2*rinv; - prod = 0.25 * sk2_rinv; - - /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ - log_term = std::log(uij*lij_inv); - - tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); - - if (raj < sk_ai-dr) - { - tmp = tmp + 2.0 * (raj_inv-lij); - } - - t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); - t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); - t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; - - dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ - - born->gpol_hct_work[k] += 0.5*tmp; - } - else - { - dadxj = 0.0; - } - fr->dadx[n++] = dadxi; - fr->dadx[n++] = dadxj; - } - born->gpol_hct_work[i] += sum_ai; - } - - /* Parallel summations would go here if ever implemented with DD */ - - if (gb_algorithm == egbHCT) - { - /* HCT */ - for (i = 0; i < natoms; i++) - { - if (born->use[i] != 0) - { - rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset; - sum_ai = 1.0/rai - born->gpol_hct_work[i]; - min_rad = rai + born->gb_doffset; - rad = 1.0/sum_ai; - - born->bRad[i] = std::max(rad, min_rad); - fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]); - } - } - - } - else - { - /* OBC */ - /* Calculate the radii */ - for (i = 0; i < natoms; i++) - { - if (born->use[i] != 0) - { - rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]; - rai_inv2 = 1.0/rai; - rai = rai-doffset; - rai_inv = 1.0/rai; - sum_ai = rai * born->gpol_hct_work[i]; - sum_ai2 = sum_ai * sum_ai; - sum_ai3 = sum_ai2 * sum_ai; - - tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3); - born->bRad[i] = rai_inv - tsum*rai_inv2; - born->bRad[i] = 1.0 / born->bRad[i]; - - fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]); - - tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2); - born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2; - } - } - } - return 0; -} - - - - - -int -genborn_allvsall_calc_chainrule(t_forcerec * fr, - t_mdatoms * mdatoms, - gmx_genborn_t * born, - real * x, - real * f, - int gb_algorithm, - void * work) -{ - gmx_allvsallgb2_data_t *aadata; - int natoms; - int ni0, ni1; - int nj0, nj1, nj2; - int i, j, k, n; - int idx; - int * mask; - - real ix, iy, iz; - real fix, fiy, fiz; - real jx, jy, jz; - real dx, dy, dz; - real tx, ty, tz; - real rbai, rbaj, fgb, fgb_ai, rbi; - real * rb; - real * dadx; - - natoms = mdatoms->nr; - ni0 = 0; - ni1 = mdatoms->homenr; - dadx = fr->dadx; - - aadata = (gmx_allvsallgb2_data_t *)work; - - n = 0; - rb = born->work; - - /* Loop to get the proper form for the Born radius term */ - if (gb_algorithm == egbSTILL) - { - for (i = 0; i < natoms; i++) - { - rbi = born->bRad[i]; - rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0; - } - } - else if (gb_algorithm == egbHCT) - { - for (i = 0; i < natoms; i++) - { - rbi = born->bRad[i]; - rb[i] = rbi * rbi * fr->dvda[i]; - } - } - else if (gb_algorithm == egbOBC) - { - for (idx = 0; idx < natoms; idx++) - { - rbi = born->bRad[idx]; - rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx]; - } - } - - for (i = ni0; i < ni1; i++) - { - /* We assume shifts are NOT used for all-vs-all interactions */ - - /* Load i atom data */ - ix = x[3*i]; - iy = x[3*i+1]; - iz = x[3*i+2]; - - fix = 0; - fiy = 0; - fiz = 0; - - rbai = rb[i]; - - /* Load limits for loop over neighbors */ - nj0 = aadata->jindex_gb[3*i]; - nj1 = aadata->jindex_gb[3*i+1]; - nj2 = aadata->jindex_gb[3*i+2]; - - mask = aadata->exclusion_mask_gb[i]; - - /* Prologue part, including exclusion mask */ - for (j = nj0; j < nj1; j++, mask++) - { - if (*mask != 0) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - - rbaj = rb[k]; - - fgb = rbai*dadx[n++]; - fgb_ai = rbaj*dadx[n++]; - - /* Total force between ai and aj is the sum of ai->aj and aj->ai */ - fgb = fgb + fgb_ai; - - tx = fgb * dx; - ty = fgb * dy; - tz = fgb * dz; - - fix = fix + tx; - fiy = fiy + ty; - fiz = fiz + tz; - - /* Update force on atom aj */ - f[3*k] = f[3*k] - tx; - f[3*k+1] = f[3*k+1] - ty; - f[3*k+2] = f[3*k+2] - tz; - } - } - - /* Main part, no exclusions */ - for (j = nj1; j < nj2; j++) - { - k = j%natoms; - - /* load j atom coordinates */ - jx = x[3*k]; - jy = x[3*k+1]; - jz = x[3*k+2]; - - /* Calculate distance */ - dx = ix - jx; - dy = iy - jy; - dz = iz - jz; - - rbaj = rb[k]; - - fgb = rbai*dadx[n++]; - fgb_ai = rbaj*dadx[n++]; - - /* Total force between ai and aj is the sum of ai->aj and aj->ai */ - fgb = fgb + fgb_ai; - - tx = fgb * dx; - ty = fgb * dy; - tz = fgb * dz; - - fix = fix + tx; - fiy = fiy + ty; - fiz = fiz + tz; - - /* Update force on atom aj */ - f[3*k] = f[3*k] - tx; - f[3*k+1] = f[3*k+1] - ty; - f[3*k+2] = f[3*k+2] - tz; - } - /* Update force and shift forces on atom ai */ - f[3*i] = f[3*i] + fix; - f[3*i+1] = f[3*i+1] + fiy; - f[3*i+2] = f[3*i+2] + fiz; - } - - return 0; -} diff --git a/src/gromacs/mdlib/genborn_allvsall.h b/src/gromacs/mdlib/genborn_allvsall.h deleted file mode 100644 index da0f3fa7eb..0000000000 --- a/src/gromacs/mdlib/genborn_allvsall.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2009, The GROMACS Development Team. - * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#ifndef _GENBORN_ALLVSALL_H -#define _GENBORN_ALLVSALL_H - -#include "gromacs/utility/real.h" - -struct gmx_genborn_t; -struct gmx_localtop_t; -struct t_forcerec; -struct t_mdatoms; - -int -genborn_allvsall_calc_still_radii(struct t_forcerec * fr, - t_mdatoms * mdatoms, - gmx_genborn_t * born, - gmx_localtop_t * top, - real * x, - void * work); - -int -genborn_allvsall_calc_hct_obc_radii(struct t_forcerec * fr, - t_mdatoms * mdatoms, - gmx_genborn_t * born, - int gb_algorithm, - gmx_localtop_t * top, - real * x, - void * work); - -int -genborn_allvsall_calc_chainrule(struct t_forcerec * fr, - t_mdatoms * mdatoms, - gmx_genborn_t * born, - real * x, - real * f, - int gb_algorithm, - void * work); - -#endif diff --git a/src/gromacs/mdlib/mdebin.cpp b/src/gromacs/mdlib/mdebin.cpp index 49361183ff..3858ba527d 100644 --- a/src/gromacs/mdlib/mdebin.cpp +++ b/src/gromacs/mdlib/mdebin.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -241,18 +241,6 @@ t_mdebin *init_mdebin(ener_file_t fp_ene, { md->bEner[i] = TRUE; } - else if ((i == F_GBPOL) && ir->implicit_solvent == eisGBSA) - { - md->bEner[i] = TRUE; - } - else if ((i == F_NPSOLVATION) && ir->implicit_solvent == eisGBSA && (ir->sa_algorithm != esaNO)) - { - md->bEner[i] = TRUE; - } - else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14)) - { - md->bEner[i] = FALSE; - } else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP)) { md->bEner[i] = EI_DYNAMICS(ir->eI); diff --git a/src/gromacs/mdlib/minimize.cpp b/src/gromacs/mdlib/minimize.cpp index a27575016d..e1814f97d8 100644 --- a/src/gromacs/mdlib/minimize.cpp +++ b/src/gromacs/mdlib/minimize.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -769,7 +769,7 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, count, nrnb, wcycle, top, &top_global->groups, ems->s.box, ems->s.x, &ems->s.hist, ems->f, force_vir, mdAtoms->mdatoms(), enerd, fcd, - ems->s.lambda, graph, fr, vsite, mu_tot, t, nullptr, TRUE, + ems->s.lambda, graph, fr, vsite, mu_tot, t, nullptr, GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_NS : 0), @@ -2832,7 +2832,6 @@ double do_nm(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog, size_t atom = atom_index[aid]; for (size_t d = 0; d < DIM; d++) { - gmx_bool bBornRadii = FALSE; gmx_int64_t step = 0; int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; double t = 0; @@ -2861,7 +2860,7 @@ double do_nm(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog, constr, enerd, fcd, &state_work.s, &state_work.f, vir, mdatoms, nrnb, wcycle, graph, &top_global->groups, - shellfc, fr, bBornRadii, t, mu_tot, + shellfc, fr, t, mu_tot, vsite, DdOpenBalanceRegionBeforeForceComputation::no, DdCloseBalanceRegionAfterForceComputation::no); diff --git a/src/gromacs/mdlib/shellfc.cpp b/src/gromacs/mdlib/shellfc.cpp index f953a5a6aa..83b7273f52 100644 --- a/src/gromacs/mdlib/shellfc.cpp +++ b/src/gromacs/mdlib/shellfc.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -987,7 +987,6 @@ void relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose, gmx_groups_t *groups, gmx_shellfc_t *shfc, t_forcerec *fr, - gmx_bool bBornRadii, double t, rvec mu_tot, gmx_vsite_t *vsite, DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion, @@ -1121,7 +1120,7 @@ void relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose, state->box, state->x, &state->hist, force[Min], force_vir, md, enerd, fcd, state->lambda, graph, - fr, vsite, mu_tot, t, nullptr, bBornRadii, + fr, vsite, mu_tot, t, nullptr, (bDoNS ? GMX_FORCE_NS : 0) | force_flags, ddOpenBalanceRegion, ddCloseBalanceRegion); @@ -1224,7 +1223,7 @@ void relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose, top, groups, state->box, pos[Try], &state->hist, force[Try], force_vir, md, enerd, fcd, state->lambda, graph, - fr, vsite, mu_tot, t, nullptr, bBornRadii, + fr, vsite, mu_tot, t, nullptr, force_flags, ddOpenBalanceRegion, ddCloseBalanceRegion); diff --git a/src/gromacs/mdlib/shellfc.h b/src/gromacs/mdlib/shellfc.h index c092a1f37a..73831831b7 100644 --- a/src/gromacs/mdlib/shellfc.h +++ b/src/gromacs/mdlib/shellfc.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -81,7 +81,6 @@ void relax_shell_flexcon(FILE *log, t_commrec *cr, gmx_bool bVerbose, gmx_groups_t *groups, gmx_shellfc_t *shfc, t_forcerec *fr, - gmx_bool bBornRadii, double t, rvec mu_tot, gmx_vsite_t *vsite, DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion, diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index c2c40c4414..b38c3a8a3e 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -73,7 +73,6 @@ #include "gromacs/mdlib/constr.h" #include "gromacs/mdlib/force.h" #include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/genborn.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdlib/mdrun.h" #include "gromacs/mdlib/nb_verlet.h" @@ -1053,7 +1052,6 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr, t_forcerec *fr, interaction_const_t *ic, gmx_vsite_t *vsite, rvec mu_tot, double t, gmx_edsam_t ed, - gmx_bool bBornRadii, int flags, DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion, DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion) @@ -1538,9 +1536,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr, /* Compute the bonded and non-bonded energies and optionally forces */ do_force_lowlevel(fr, inputrec, &(top->idef), cr, nrnb, wcycle, mdatoms, - as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd, top, fr->born, - bBornRadii, box, - inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot, + as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd, + box, inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot, flags, &cycles_pme); wallcycle_stop(wcycle, ewcFORCE); @@ -1762,7 +1759,6 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr, real *lambda, t_graph *graph, t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot, double t, gmx_edsam_t ed, - gmx_bool bBornRadii, int flags, DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion, DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion) @@ -1930,12 +1926,6 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr, wallcycle_stop(wcycle, ewcNS); } - if (inputrec->implicit_solvent && bNS) - { - make_gb_nblist(cr, inputrec->gb_algorithm, - as_rvec_array(x.data()), box, fr, &top->idef, graph, fr->born); - } - if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME)) { wallcycle_start(wcycle, ewcPPDURINGPME); @@ -1992,9 +1982,8 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr, /* Compute the bonded and non-bonded energies and optionally forces */ do_force_lowlevel(fr, inputrec, &(top->idef), cr, nrnb, wcycle, mdatoms, - as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd, top, fr->born, - bBornRadii, box, - inputrec->fepvals, lambda, + as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd, + box, inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot, flags, &cycles_pme); @@ -2100,7 +2089,6 @@ void do_force(FILE *fplog, t_commrec *cr, t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot, double t, gmx_edsam_t ed, - gmx_bool bBornRadii, int flags, DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion, DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion) @@ -2129,7 +2117,6 @@ void do_force(FILE *fplog, t_commrec *cr, fr, fr->ic, vsite, mu_tot, t, ed, - bBornRadii, flags, ddOpenBalanceRegion, ddCloseBalanceRegion); @@ -2146,7 +2133,6 @@ void do_force(FILE *fplog, t_commrec *cr, lambda.data(), graph, fr, vsite, mu_tot, t, ed, - bBornRadii, flags, ddOpenBalanceRegion, ddCloseBalanceRegion); diff --git a/src/gromacs/mdlib/tpi.cpp b/src/gromacs/mdlib/tpi.cpp index 6588f1522d..b284487a1f 100644 --- a/src/gromacs/mdlib/tpi.cpp +++ b/src/gromacs/mdlib/tpi.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -664,7 +664,7 @@ double do_tpi(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog, state_global->box, state_global->x, &state_global->hist, f, force_vir, mdatoms, enerd, fcd, state_global->lambda, - nullptr, fr, nullptr, mu_tot, t, nullptr, FALSE, + nullptr, fr, nullptr, mu_tot, t, nullptr, GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS : 0) | (bStateChanged ? GMX_FORCE_STATECHANGED : 0), diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h index ce3b8b341f..e4e41decf9 100644 --- a/src/gromacs/mdtypes/forcerec.h +++ b/src/gromacs/mdtypes/forcerec.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,7 +50,6 @@ struct ForceProviders; /* Abstract type for PME that is defined only in the routine that use them. */ -struct gmx_genborn_t; struct gmx_ns_t; struct gmx_pme_t; struct nonbonded_verlet_t; @@ -110,14 +109,10 @@ extern "C" { enum { enbvdwNONE, enbvdwLJ, enbvdwBHAM, enbvdwTAB, enbvdwNR }; -/* OOR is "one over r" -- standard coul */ -enum { - enbcoulNONE, enbcoulOOR, enbcoulRF, enbcoulTAB, enbcoulGB, enbcoulFEWALD, enbcoulNR -}; enum { egCOULSR, egLJSR, egBHAMSR, - egCOUL14, egLJ14, egGB, egNR + egCOUL14, egLJ14, egNR }; extern const char *egrp_nm[egNR+1]; @@ -192,7 +187,6 @@ struct t_forcerec { gmx_bool bAllvsAll; /* Private work data */ void *AllvsAll_work; - void *AllvsAll_workgb; /* Cut-Off stuff. * Infinite cut-off's will be GMX_CUTOFF_INF (unlike in t_inputrec: 0). @@ -320,46 +314,6 @@ struct t_forcerec { /* Shell molecular dynamics flexible constraints */ real fc_stepsize; - /* Generalized born implicit solvent */ - gmx_bool bGB; - /* Generalized born stuff */ - real gb_epsilon_solvent; - /* Table data for GB */ - struct t_forcetable *gbtab; - /* VdW radius for each atomtype (dim is thus ntype) */ - real *atype_radius; - /* Effective radius (derived from effective volume) for each type */ - real *atype_vol; - /* Implicit solvent - surface tension for each atomtype */ - real *atype_surftens; - /* Implicit solvent - radius for GB calculation */ - real *atype_gb_radius; - /* Implicit solvent - overlap for HCT model */ - real *atype_S_hct; - /* Generalized born interaction data */ - struct gmx_genborn_t *born; - - /* Table scale for GB */ - real gbtabscale; - /* Table range for GB */ - real gbtabr; - /* GB neighborlists (the sr list will contain for each atom all other atoms - * (for use in the SA calculation) and the lr list will contain - * for each atom all atoms 1-4 or greater (for use in the GB calculation) - */ - struct t_nblist *gblist_sr; - struct t_nblist *gblist_lr; - struct t_nblist *gblist; - - /* Inverse square root of the Born radii for implicit solvent */ - real *invsqrta; - /* Derivatives of the potential with respect to the Born radii */ - real *dvda; - /* Derivatives of the Born radii with respect to coordinates */ - real *dadx; - real *dadx_rawptr; - int nalloc_dadx; /* Allocated size of dadx */ - /* If > 0 signals Test Particle Insertion, * the value is the number of atoms of the molecule to insert * Only the energy difference due to the addition of the last molecule diff --git a/src/gromacs/mdtypes/inputrec.cpp b/src/gromacs/mdtypes/inputrec.cpp index 665e6563c4..e42cfe2385 100644 --- a/src/gromacs/mdtypes/inputrec.cpp +++ b/src/gromacs/mdtypes/inputrec.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2010, The GROMACS development team. - * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -927,22 +927,6 @@ void pr_inputrec(FILE *fp, int indent, const char *title, const t_inputrec *ir, PR("ewald-geometry", ir->ewald_geometry); PR("epsilon-surface", ir->epsilon_surface); - /* Implicit solvent */ - PS("implicit-solvent", EIMPLICITSOL(ir->implicit_solvent)); - - /* Generalized born electrostatics */ - PS("gb-algorithm", EGBALGORITHM(ir->gb_algorithm)); - PI("nstgbradii", ir->nstgbradii); - PR("rgbradii", ir->rgbradii); - PR("gb-epsilon-solvent", ir->gb_epsilon_solvent); - PR("gb-saltconc", ir->gb_saltconc); - PR("gb-obc-alpha", ir->gb_obc_alpha); - PR("gb-obc-beta", ir->gb_obc_beta); - PR("gb-obc-gamma", ir->gb_obc_gamma); - PR("gb-dielectric-offset", ir->gb_dielectric_offset); - PS("sa-algorithm", ESAALGORITHM(ir->sa_algorithm)); - PR("sa-surface-tension", ir->sa_surface_tension); - /* Options for weak coupling algorithms */ PS("tcoupl", ETCOUPLTYPE(ir->etc)); PI("nsttcouple", ir->nsttcouple); @@ -1343,18 +1327,6 @@ void cmp_inputrec(FILE *fp, const t_inputrec *ir1, const t_inputrec *ir2, real f cmp_real(fp, "inputrec->epsilon_r", -1, ir1->epsilon_r, ir2->epsilon_r, ftol, abstol); cmp_real(fp, "inputrec->epsilon_rf", -1, ir1->epsilon_rf, ir2->epsilon_rf, ftol, abstol); cmp_real(fp, "inputrec->tabext", -1, ir1->tabext, ir2->tabext, ftol, abstol); - cmp_int(fp, "inputrec->implicit_solvent", -1, ir1->implicit_solvent, ir2->implicit_solvent); - cmp_int(fp, "inputrec->gb_algorithm", -1, ir1->gb_algorithm, ir2->gb_algorithm); - cmp_int(fp, "inputrec->nstgbradii", -1, ir1->nstgbradii, ir2->nstgbradii); - cmp_real(fp, "inputrec->rgbradii", -1, ir1->rgbradii, ir2->rgbradii, ftol, abstol); - cmp_real(fp, "inputrec->gb_saltconc", -1, ir1->gb_saltconc, ir2->gb_saltconc, ftol, abstol); - cmp_real(fp, "inputrec->gb_epsilon_solvent", -1, ir1->gb_epsilon_solvent, ir2->gb_epsilon_solvent, ftol, abstol); - cmp_real(fp, "inputrec->gb_obc_alpha", -1, ir1->gb_obc_alpha, ir2->gb_obc_alpha, ftol, abstol); - cmp_real(fp, "inputrec->gb_obc_beta", -1, ir1->gb_obc_beta, ir2->gb_obc_beta, ftol, abstol); - cmp_real(fp, "inputrec->gb_obc_gamma", -1, ir1->gb_obc_gamma, ir2->gb_obc_gamma, ftol, abstol); - cmp_real(fp, "inputrec->gb_dielectric_offset", -1, ir1->gb_dielectric_offset, ir2->gb_dielectric_offset, ftol, abstol); - cmp_int(fp, "inputrec->sa_algorithm", -1, ir1->sa_algorithm, ir2->sa_algorithm); - cmp_real(fp, "inputrec->sa_surface_tension", -1, ir1->sa_surface_tension, ir2->sa_surface_tension, ftol, abstol); cmp_int(fp, "inputrec->eDispCorr", -1, ir1->eDispCorr, ir2->eDispCorr); cmp_real(fp, "inputrec->shake_tol", -1, ir1->shake_tol, ir2->shake_tol, ftol, abstol); @@ -1466,8 +1438,7 @@ gmx_bool inputrecNeedMutot(const t_inputrec *ir) gmx_bool inputrecExclForces(const t_inputrec *ir) { - return (EEL_FULL(ir->coulombtype) || (EEL_RF(ir->coulombtype)) || - ir->implicit_solvent != eisNO); + return (EEL_FULL(ir->coulombtype) || (EEL_RF(ir->coulombtype))); } gmx_bool inputrecNptTrotter(const t_inputrec *ir) diff --git a/src/gromacs/mdtypes/inputrec.h b/src/gromacs/mdtypes/inputrec.h index 4094bc02fd..87a715980e 100644 --- a/src/gromacs/mdtypes/inputrec.h +++ b/src/gromacs/mdtypes/inputrec.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -297,18 +297,7 @@ struct t_inputrec real rcoulomb; /* Coulomb cutoff (nm) */ real epsilon_r; /* relative dielectric constant */ real epsilon_rf; /* relative dielectric constant of the RF */ - int implicit_solvent; /* No (=explicit water), or GBSA solvent models */ - int gb_algorithm; /* Algorithm to use for calculation Born radii */ - int nstgbradii; /* Frequency of updating Generalized Born radii */ - real rgbradii; /* Cutoff for GB radii calculation */ - real gb_saltconc; /* Salt concentration (M) for GBSA models */ - real gb_epsilon_solvent; /* dielectric coeff. of implicit solvent */ - real gb_obc_alpha; /* 1st scaling factor for Bashford-Case GB */ - real gb_obc_beta; /* 2nd scaling factor for Bashford-Case GB */ - real gb_obc_gamma; /* 3rd scaling factor for Bashford-Case GB */ - real gb_dielectric_offset; /* Dielectric offset for Still/HCT/OBC */ - int sa_algorithm; /* Algorithm for SA part of GBSA */ - real sa_surface_tension; /* Energy factor for SA part of GBSA */ + bool implicit_solvent; /* Always false (no longer supported */ int vdwtype; /* Type of Van der Waals treatment */ int vdw_modifier; /* Modify the VdW interaction */ real rvdw_switch; /* Van der Waals switch range start (nm) */ diff --git a/src/gromacs/mdtypes/md_enums.cpp b/src/gromacs/mdtypes/md_enums.cpp index e47f1e0a0f..de4e0c362d 100644 --- a/src/gromacs/mdtypes/md_enums.cpp +++ b/src/gromacs/mdtypes/md_enums.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -79,7 +79,7 @@ const char *erefscaling_names[erscNR+1] = { const char *eel_names[eelNR+1] = { "Cut-off", "Reaction-Field", "Generalized-Reaction-Field", "PME", "Ewald", "P3M-AD", "Poisson", "Switch", "Shift", "User", - "Generalized-Born", "Reaction-Field-nec", "Encad-shift", + "Generalized-Born (unused)", "Reaction-Field-nec", "Encad-shift", "PME-User", "PME-Switch", "PME-User-Switch", "Reaction-Field-zero", nullptr }; @@ -189,18 +189,6 @@ const char *eann_names[eannNR+1] = { "No", "Single", "Periodic", nullptr }; -const char *eis_names[eisNR+1] = { - "No", "GBSA", nullptr -}; - -const char *egb_names[egbNR+1] = { - "Still", "HCT", "OBC", nullptr -}; - -const char *esa_names[esaNR+1] = { - "Ace-approximation", "None", "Still", nullptr -}; - const char *ewt_names[ewtNR+1] = { "9-3", "10-4", "table", "12-6", nullptr }; @@ -257,7 +245,7 @@ const char *gmx_nblist_interaction_names[GMX_NBLIST_INTERACTION_NR+1] = { const char *gmx_nbkernel_elec_names[GMX_NBKERNEL_ELEC_NR+1] = { - "None", "Coulomb", "Reaction-Field", "Cubic-Spline-Table", "Generalized-Born", "Ewald", nullptr + "None", "Coulomb", "Reaction-Field", "Cubic-Spline-Table", "Ewald", nullptr }; const char *gmx_nbkernel_vdw_names[GMX_NBKERNEL_VDW_NR+1] = diff --git a/src/gromacs/mdtypes/md_enums.h b/src/gromacs/mdtypes/md_enums.h index a0c1565499..af55870361 100644 --- a/src/gromacs/mdtypes/md_enums.h +++ b/src/gromacs/mdtypes/md_enums.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -159,12 +159,7 @@ extern const char *eintmod_names[eintmodNR+1]; //! Macro to select the correct string for modifiers #define INTMODIFIER(e) enum_name(e, eintmodNR, eintmod_names) -/*! \brief Cut-off treatment for Coulomb - * - * eelNOTUSED1 used to be GB, but to enable generalized born with different - * forms of electrostatics (RF, switch, etc.) in the future it is now selected - * separately (through the implicit_solvent option). - */ +/*! \brief Cut-off treatment for Coulomb */ enum { eelCUT, eelRF, eelGRF, eelPME, eelEWALD, eelP3M_AD, eelPOISSON, eelSWITCH, eelSHIFT, eelUSER, eelGB_NOTUSED, eelRF_NEC_UNSUPPORTED, eelENCADSHIFT, @@ -469,33 +464,6 @@ extern const char *eann_names[eannNR+1]; //! And macro for simulated annealing string #define EANNEAL(e) enum_name(e, eannNR, eann_names) -//! Implicit solvent algorithms. -enum { - eisNO, eisGBSA, eisNR -}; -//! String corresponding to implicit solvent. -extern const char *eis_names[eisNR+1]; -//! Macro for implicit solvent string. -#define EIMPLICITSOL(e) enum_name(e, eisNR, eis_names) - -//! Algorithms for calculating GB radii. -enum { - egbSTILL, egbHCT, egbOBC, egbNR -}; -//! String for GB algorithm name. -extern const char *egb_names[egbNR+1]; -//! Macro for GB string. -#define EGBALGORITHM(e) enum_name(e, egbNR, egb_names) - -//! Surface area algorithm for implicit solvent. -enum { - esaAPPROX, esaNO, esaSTILL, esaNR -}; -//! String corresponding to surface area algorithm. -extern const char *esa_names[esaNR+1]; -//! brief Macro for SA algorithm string. -#define ESAALGORITHM(e) enum_name(e, esaNR, esa_names) - //! Wall types. enum { ewt93, ewt104, ewtTABLE, ewt126, ewtNR @@ -636,7 +604,6 @@ enum gmx_nbkernel_elec GMX_NBKERNEL_ELEC_COULOMB, GMX_NBKERNEL_ELEC_REACTIONFIELD, GMX_NBKERNEL_ELEC_CUBICSPLINETABLE, - GMX_NBKERNEL_ELEC_GENERALIZEDBORN, GMX_NBKERNEL_ELEC_EWALD, GMX_NBKERNEL_ELEC_NR }; diff --git a/src/gromacs/tables/forcetable.cpp b/src/gromacs/tables/forcetable.cpp index cfd20340b9..6b2a87692f 100644 --- a/src/gromacs/tables/forcetable.cpp +++ b/src/gromacs/tables/forcetable.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -1446,74 +1446,6 @@ t_forcetable *make_tables(FILE *out, return table; } -t_forcetable *make_gb_table(const t_forcerec *fr) -{ - t_tabledata *td; - int nx0; - double r, r2, Vtab, Ftab, expterm; - - t_forcetable *table; - - /* Set the table dimensions for GB, not really necessary to - * use etiNR (since we only have one table, but ...) - */ - snew(table, 1); - snew(td, 1); - table->interaction = GMX_TABLE_INTERACTION_ELEC; - table->format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH; - table->r = fr->gbtabr; - table->scale = fr->gbtabscale; - table->n = static_cast(table->scale*table->r); - table->formatsize = 4; - table->ninteractions = 1; - table->stride = table->formatsize*table->ninteractions; - nx0 = 0; - - /* Each table type (e.g. coul,lj6,lj12) requires four numbers per - * datapoint. For performance reasons we want the table data to be - * aligned on a 32-byte boundary. This new pointer must not be - * used in a free() call, but thankfully we're sloppy enough not - * to do this :-) - */ - - snew_aligned(table->data, table->stride*table->n, 32); - - init_table(table->n, nx0, table->scale, &(td[0]), TRUE); - - /* Local implementation so we don't have to use the etabGB - * enum above, which will cause problems later when - * making the other tables (right now even though we are using - * GB, the normal Coulomb tables will be created, but this - * will cause a problem since fr->eeltype==etabGB which will not - * be defined in fill_table and set_table_type - */ - - for (int i = nx0; i < table->n; i++) - { - r = td->x[i]; - r2 = r*r; - expterm = exp(-0.25*r2); - - Vtab = 1/sqrt(r2+expterm); - Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm)); - - /* Convert to single precision when we store to mem */ - td->x[i] = i/table->scale; - td->v[i] = Vtab; - td->f[i] = Ftab; - - } - - copy2table(table->n, 0, table->stride, td[0].x, td[0].v, td[0].f, 1.0, table->data); - - done_tabledata(&(td[0])); - sfree(td); - - return table; - - -} - bondedtable_t make_bonded_table(FILE *fplog, const char *fn, int angle) { t_tabledata td; diff --git a/src/gromacs/tables/forcetable.h b/src/gromacs/tables/forcetable.h index 7536f48d1b..73f765a3d6 100644 --- a/src/gromacs/tables/forcetable.h +++ b/src/gromacs/tables/forcetable.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -143,13 +143,6 @@ t_forcetable *make_tables(FILE *fp, */ bondedtable_t make_bonded_table(FILE *fplog, const char *fn, int angle); -/*! \brief Return a table for GB calculations - * - * \param fr Force record - * \return Pointer to new gb table structure - */ -t_forcetable *make_gb_table(const t_forcerec *fr); - /*! \brief Construct and return tabulated dispersion and repulsion interactions * * This table can be used to compute long-range dispersion corrections */ diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp index 6195dadd27..86f5a7d8bc 100644 --- a/src/gromacs/timing/wallcycle.cpp +++ b/src/gromacs/timing/wallcycle.cpp @@ -104,7 +104,7 @@ static const char *wcn[ewcNR] = { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.", - "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh", + "Comm. coord.", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread", "PME gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec", "PME wait for PP", "Wait + Recv. PME F", "Wait PME GPU spread", "PME 3D-FFT", "PME solve", /* the strings for FFT/solve are repeated here for mixed mode counters */ diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h index 9698467f7e..6896f87a95 100644 --- a/src/gromacs/timing/wallcycle.h +++ b/src/gromacs/timing/wallcycle.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,7 +50,7 @@ struct t_commrec; enum { ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD, ewcDDCOMMBOUND, ewcVSITECONSTR, ewcPP_PMESENDX, ewcNS, ewcLAUNCH_GPU, - ewcMOVEX, ewcGB, ewcFORCE, ewcMOVEF, ewcPMEMESH, + ewcMOVEX, ewcFORCE, ewcMOVEF, ewcPMEMESH, ewcPME_REDISTXF, ewcPME_SPREAD, ewcPME_GATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE, ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_PME_SPREAD, ewcPME_FFT_MIXED_MODE, ewcPME_SOLVE_MIXED_MODE, diff --git a/src/gromacs/topology/atoms.cpp b/src/gromacs/topology/atoms.cpp index 0fa17bcd36..61449e4859 100644 --- a/src/gromacs/topology/atoms.cpp +++ b/src/gromacs/topology/atoms.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -74,11 +74,7 @@ void init_atom(t_atoms *at) void init_atomtypes(t_atomtypes *at) { at->nr = 0; - at->radius = nullptr; - at->vol = nullptr; at->atomnumber = nullptr; - at->gb_radius = nullptr; - at->S_hct = nullptr; } void done_atom(t_atoms *at) @@ -95,12 +91,7 @@ void done_atom(t_atoms *at) void done_atomtypes(t_atomtypes *atype) { atype->nr = 0; - sfree(atype->radius); - sfree(atype->vol); - sfree(atype->surftens); sfree(atype->atomnumber); - sfree(atype->gb_radius); - sfree(atype->S_hct); } void add_t_atoms(t_atoms *atoms, int natom_extra, int nres_extra) @@ -332,10 +323,8 @@ void pr_atomtypes(FILE *fp, int indent, const char *title, const t_atomtypes *at { pr_indent(fp, indent); fprintf(fp, - "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n", - bShowNumbers ? i : -1, atomtypes->radius[i], atomtypes->vol[i], - atomtypes->gb_radius[i], - atomtypes->surftens[i], atomtypes->atomnumber[i], atomtypes->S_hct[i]); + "atomtype[%3d]={atomnumber=%4d}\n", + bShowNumbers ? i : -1, atomtypes->atomnumber[i]); } } } diff --git a/src/gromacs/topology/atoms.h b/src/gromacs/topology/atoms.h index 8bb674b67d..59bd270a9f 100644 --- a/src/gromacs/topology/atoms.h +++ b/src/gromacs/topology/atoms.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2016,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -131,11 +131,6 @@ typedef struct t_atoms typedef struct t_atomtypes { int nr; /* number of atomtypes */ - real *radius; /* GBSA radius for each atomtype */ - real *vol; /* GBSA efective volume for each atomtype */ - real *surftens; /* implicit solvent surftens for each atomtype */ - real *gb_radius; /* GB radius for each atom type */ - real *S_hct; /* Overlap factors for HCT/OBC GB models */ int *atomnumber; /* Atomic number, used for QM/MM */ } t_atomtypes; diff --git a/src/gromacs/topology/idef.cpp b/src/gromacs/topology/idef.cpp index 4806c2a9ba..652a36bb5d 100644 --- a/src/gromacs/topology/idef.cpp +++ b/src/gromacs/topology/idef.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -273,10 +273,14 @@ void pr_iparams(FILE *fp, t_functype ftype, const t_iparams *iparams) case F_VSITEN: fprintf(fp, "n=%2d, a=%15.8e\n", iparams->vsiten.n, iparams->vsiten.a); break; - case F_GB12: - case F_GB13: - case F_GB14: - fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n", iparams->gb.sar, iparams->gb.st, iparams->gb.pi, iparams->gb.gbr, iparams->gb.bmlt); + case F_GB12_NOLONGERUSED: + case F_GB13_NOLONGERUSED: + case F_GB14_NOLONGERUSED: + // These could only be generated by grompp, not written in + // a .top file. Now that implicit solvent is not + // supported, they can't be generated, and the values are + // ignored if read from an old .tpr file. So there is + // nothing to print. break; case F_CMAP: fprintf(fp, "cmapA=%1d, cmapB=%1d\n", iparams->cmap.cmapA, iparams->cmap.cmapB); diff --git a/src/gromacs/topology/idef.h b/src/gromacs/topology/idef.h index 467d7d45f8..822bbf25fb 100644 --- a/src/gromacs/topology/idef.h +++ b/src/gromacs/topology/idef.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -87,11 +87,11 @@ enum { F_PIDIHS, F_TABDIHS, F_CMAP, - F_GB12, - F_GB13, - F_GB14, - F_GBPOL, - F_NPSOLVATION, + F_GB12_NOLONGERUSED, + F_GB13_NOLONGERUSED, + F_GB14_NOLONGERUSED, + F_GBPOL_NOLONGERUSED, + F_NPSOLVATION_NOLONGERUSED, F_LJ14, F_COUL14, F_LJC14_Q, @@ -268,9 +268,6 @@ typedef union t_iparams struct { int table; real kA; real kB; } tab; - struct { - real sar, st, pi, gbr, bmlt; - } gb; struct { int cmapA, cmapB; } cmap; diff --git a/src/gromacs/topology/ifunc.cpp b/src/gromacs/topology/ifunc.cpp index 376e46402c..f042b5ceaf 100644 --- a/src/gromacs/topology/ifunc.cpp +++ b/src/gromacs/topology/ifunc.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -113,11 +113,11 @@ const t_interaction_function interaction_function[F_NRE] = def_bonded ("PIDIHS", "Improper Dih.", 4, 3, 3, eNR_IMPROPER, pdihs ), def_bondedt ("TABDIHS", "Tab. Dih.", 4, 2, 2, eNR_TABDIHS, tab_dihs ), def_bonded ("CMAP", "CMAP Dih.", 5, -1, -1, eNR_CMAP, unimplemented ), - def_bonded ("GB12", "GB 1-2 Pol.", 2, 4, 0, eNR_GB, unimplemented ), - def_bonded ("GB13", "GB 1-3 Pol.", 2, 4, 0, eNR_GB, unimplemented ), - def_bonded ("GB14", "GB 1-4 Pol.", 2, 4, 0, eNR_GB, unimplemented ), - def_nofc ("GBPOL", "GB Polarization" ), - def_nofc ("NPSOLVATION", "Nonpolar Sol." ), + def_nofc ("GB12", "GB 1-2 Pol. (unused)" ), + def_nofc ("GB13", "GB 1-3 Pol. (unused)" ), + def_nofc ("GB14", "GB 1-4 Pol. (unused)" ), + def_nofc ("GBPOL", "GB Polarization (unused)" ), + def_nofc ("NPSOLVATION", "Nonpolar Sol. (unused)" ), def_bondedz ("LJ14", "LJ-14", 2, 2, 2, eNR_NB14, unimplemented ), def_nofc ("COUL14", "Coulomb-14" ), def_bondedz ("LJC14_Q", "LJC-14 q", 2, 5, 0, eNR_NB14, unimplemented ), diff --git a/src/programs/mdrun/md.cpp b/src/programs/mdrun/md.cpp index 23462bebfc..dae46eb389 100644 --- a/src/programs/mdrun/md.cpp +++ b/src/programs/mdrun/md.cpp @@ -321,7 +321,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog, gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; gmx_bool bNS, bNStList, bSimAnn, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE, - bBornRadii, bUsingEnsembleRestraints; + bUsingEnsembleRestraints; gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, bForceUpdate = FALSE, bCPT; @@ -1060,13 +1060,6 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog, bLastStep = TRUE; } - /* Determine whether or not to update the Born radii if doing GB */ - bBornRadii = bFirstStep; - if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) - { - bBornRadii = TRUE; - } - /* do_log triggers energy and virial calculation. Because this leads * to different code paths, forces can be different. Thus for exact * continuation we should avoid extra log output. @@ -1207,7 +1200,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog, constr, enerd, fcd, state, &f, force_vir, mdatoms, nrnb, wcycle, graph, groups, - shellfc, fr, bBornRadii, t, mu_tot, + shellfc, fr, t, mu_tot, vsite, ddOpenBalanceRegion, ddCloseBalanceRegion); } @@ -1235,7 +1228,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog, state->box, state->x, &state->hist, f, force_vir, mdatoms, enerd, fcd, state->lambda, graph, - fr, vsite, mu_tot, t, ed, bBornRadii, + fr, vsite, mu_tot, t, ed, (bNS ? GMX_FORCE_NS : 0) | force_flags, ddOpenBalanceRegion, ddCloseBalanceRegion); } -- 2.22.0