From 93c6b6d68a85d5c2b0c28819b45461411f249efc Mon Sep 17 00:00:00 2001
From: Mark Abraham <mark.j.abraham@gmail.com>
Date: Mon, 15 Jan 2018 14:01:44 +0100
Subject: [PATCH] Remove support for implicit solvation

Mdp files with implicit-solvent = no can still be read, and formerly
valid related fields are now ignored, so that default mdp files from
previous versions of GROMACS will work. Anything else for the
implciit-solvent mdp value gives an error in grompp.

grompp can now only write a tpr file that has a false value for
ir->implicit_solvent, but can read older versions. When mdrun is
presented with an older tpr file that did such a simulation, it
refuses to run, presenting a useful error message. Such tpr files are
still useful for other purposes, so can still be read, except that the
fields specific to these methods are ignored.

grompp now ignores the topology directives for related parameters,
which means that force-field folders that are the same as, or
modifications of folders formerly supported by GROMACS still
work. However, the versions currently distributed have none of those
fields.

The group-scheme kernels have been removed, and generation
infrastructure updated so that they do generate the code that's in the
repo. However, now that the python generation scripts no longer
generate GB kernels, the dictionary ordering changes, which changes
the generated output. That output is not sensitive to the order of the
declarations or data-structure elements, so this is only a cosmetic
issue.

Documentation has been removed.

Unit tests on .mdp file handling have had to be updated.

Also removed unused enbcoul enumeration

Refs #1500
Refs #1971
Fixes #1054

Change-Id: Ib241555ff3d8e60012ba0e628ab0f9a3f91eca9e
---
 docs/manual/algorithms.tex                    |   68 -
 docs/manual/forcefield.tex                    |    2 +-
 docs/manual/topology.tex                      |   32 -
 docs/user-guide/cutoff-schemes.rst            |    1 -
 docs/user-guide/mdp-options.rst               |  100 +-
 docs/user-guide/mdrun-performance.rst         |    1 -
 docs/user-guide/terminology.rst               |    2 +-
 src/gromacs/domdec/domdec.cpp                 |    6 -
 src/gromacs/fileio/tpxio.cpp                  |  134 +-
 .../gmxlib/nonbonded/nb_free_energy.cpp       |    6 +-
 src/gromacs/gmxlib/nonbonded/nb_generic.cpp   |    7 +-
 src/gromacs/gmxlib/nonbonded/nb_kernel.h      |    1 -
 .../make_nb_kernel_avx_128_fma_double.py      |    6 +-
 ...cGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c |  858 ---------
 ...ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c |  736 -------
 ...ecGB_VdwNone_GeomP1P1_avx_128_fma_double.c |  665 -------
 .../nb_kernel_avx_128_fma_double.c            |  256 ++-
 .../nb_kernel_template_avx_128_fma_double.pre |  115 +-
 .../make_nb_kernel_avx_128_fma_single.py      |    6 +-
 ...cGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c |  978 ----------
 ...ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c |  864 ---------
 ...ecGB_VdwNone_GeomP1P1_avx_128_fma_single.c |  769 --------
 .../nb_kernel_avx_128_fma_single.c            |  256 ++-
 .../nb_kernel_template_avx_128_fma_single.pre |  111 +-
 .../make_nb_kernel_avx_256_double.py          |    6 +-
 ..._ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c |  972 ----------
 ...nel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c |  870 ---------
 ...l_ElecGB_VdwNone_GeomP1P1_avx_256_double.c |  775 --------
 .../nb_kernel_avx_256_double.c                |  256 ++-
 .../nb_kernel_template_avx_256_double.pre     |  107 +-
 .../make_nb_kernel_avx_256_single.py          |    6 +-
 ..._ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c | 1192 ------------
 ...nel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c | 1038 ----------
 ...l_ElecGB_VdwNone_GeomP1P1_avx_256_single.c |  911 ---------
 .../nb_kernel_avx_256_single.c                |  256 ++-
 .../nb_kernel_template_avx_256_single.pre     |  126 +-
 .../nonbonded/nb_kernel_c/make_nb_kernel_c.py |    6 +-
 .../nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c     |  505 -----
 .../nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c    |  545 ------
 .../nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c       |  501 -----
 .../nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c     |  459 -----
 .../nb_kernel_c/nb_kernel_allvsallgb.cpp      |  527 -----
 .../nb_kernel_c/nb_kernel_allvsallgb.h        |   55 -
 .../nonbonded/nb_kernel_c/nb_kernel_c.c       |  338 ++--
 .../nb_kernel_c/nb_kernel_template_c.pre      |   81 +-
 .../make_nb_kernel_sparc64_hpc_ace_double.py  |    6 +-
 ...VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c |  820 --------
 ...GB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c |  706 -------
 ..._VdwNone_GeomP1P1_sparc64_hpc_ace_double.c |  633 ------
 .../nb_kernel_sparc64_hpc_ace_double.c        |  256 ++-
 ...kernel_template_sparc64_hpc_ace_double.pre |  107 +-
 .../make_nb_kernel_sse2_double.py             |    6 +-
 ...nel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c |  838 --------
 ...kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c |  728 -------
 ...rnel_ElecGB_VdwNone_GeomP1P1_sse2_double.c |  657 -------
 .../nb_kernel_sse2_double.c                   |  256 ++-
 .../nb_kernel_template_sse2_double.pre        |  111 +-
 .../make_nb_kernel_sse2_single.py             |    6 +-
 ...nel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c |  958 ---------
 ...kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c |  856 --------
 ...rnel_ElecGB_VdwNone_GeomP1P1_sse2_single.c |  761 --------
 .../nb_kernel_sse2_single.c                   |  256 ++-
 .../nb_kernel_template_sse2_single.pre        |  107 +-
 .../make_nb_kernel_sse4_1_double.py           |    6 +-
 ...l_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c |  838 --------
 ...rnel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c |  728 -------
 ...el_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c |  657 -------
 .../nb_kernel_sse4_1_double.c                 |  256 ++-
 .../nb_kernel_template_sse4_1_double.pre      |  111 +-
 .../make_nb_kernel_sse4_1_single.py           |    6 +-
 ...l_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c |  954 ---------
 ...rnel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c |  852 --------
 ...el_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c |  757 --------
 .../nb_kernel_sse4_1_single.c                 |  256 ++-
 .../nb_kernel_template_sse4_1_single.pre      |  106 +-
 src/gromacs/gmxlib/nonbonded/nonbonded.cpp    |    3 +-
 src/gromacs/gmxlib/nrnb.cpp                   |   14 +-
 src/gromacs/gmxlib/nrnb.h                     |   12 +-
 src/gromacs/gmxpreprocess/convparm.cpp        |   34 +-
 src/gromacs/gmxpreprocess/gpp_atomtype.cpp    |  156 +-
 src/gromacs/gmxpreprocess/gpp_atomtype.h      |   20 +-
 src/gromacs/gmxpreprocess/grompp.cpp          |   87 +-
 src/gromacs/gmxpreprocess/nm2type.cpp         |    4 +-
 src/gromacs/gmxpreprocess/readir.cpp          |  117 +-
 src/gromacs/gmxpreprocess/resall.cpp          |    4 +-
 src/gromacs/gmxpreprocess/tests/readir.cpp    |   12 +
 .../refdata/GetIrTest_EmptyInputWorks.xml     |   26 +-
 ...IrTest_HandlesDifferentKindsOfMdpLines.xml |   26 +-
 .../GetIrTest_HandlesOnlyCutoffScheme.xml     |   26 +-
 .../GetIrTest_ImplicitSolventNoWorks.xml      |  321 +++
 ...IrTest_ProducesOutputFromElectricField.xml |   26 +-
 ...ucesOutputFromElectricFieldOscillating.xml |   26 +-
 ..._ProducesOutputFromElectricFieldPulsed.xml |   26 +-
 .../GetIrTest_UserErrorsSilentlyTolerated.xml |   26 +-
 src/gromacs/gmxpreprocess/topdirs.cpp         |    4 +-
 src/gromacs/gmxpreprocess/topio.cpp           |  199 +-
 src/gromacs/gmxpreprocess/topio.h             |    3 +-
 src/gromacs/gmxpreprocess/toppush.cpp         |   82 +-
 src/gromacs/gmxpreprocess/toppush.h           |    7 +-
 src/gromacs/gmxpreprocess/toputil.cpp         |    4 +-
 src/gromacs/listed-forces/listed-forces.cpp   |    5 +-
 src/gromacs/mdlib/broadcaststructs.cpp        |   18 +-
 src/gromacs/mdlib/force.cpp                   |   37 +-
 src/gromacs/mdlib/force.h                     |    6 +-
 src/gromacs/mdlib/forcerec.cpp                |   68 +-
 src/gromacs/mdlib/forcerec.h                  |    4 +-
 src/gromacs/mdlib/genborn.cpp                 | 1713 -----------------
 src/gromacs/mdlib/genborn.h                   |  170 --
 src/gromacs/mdlib/genborn_allvsall.cpp        | 1108 -----------
 src/gromacs/mdlib/genborn_allvsall.h          |   73 -
 src/gromacs/mdlib/mdebin.cpp                  |   14 +-
 src/gromacs/mdlib/minimize.cpp                |    7 +-
 src/gromacs/mdlib/shellfc.cpp                 |    7 +-
 src/gromacs/mdlib/shellfc.h                   |    3 +-
 src/gromacs/mdlib/sim_util.cpp                |   22 +-
 src/gromacs/mdlib/tpi.cpp                     |    4 +-
 src/gromacs/mdtypes/forcerec.h                |   50 +-
 src/gromacs/mdtypes/inputrec.cpp              |   33 +-
 src/gromacs/mdtypes/inputrec.h                |   15 +-
 src/gromacs/mdtypes/md_enums.cpp              |   18 +-
 src/gromacs/mdtypes/md_enums.h                |   37 +-
 src/gromacs/tables/forcetable.cpp             |   70 +-
 src/gromacs/tables/forcetable.h               |    9 +-
 src/gromacs/timing/wallcycle.cpp              |    2 +-
 src/gromacs/timing/wallcycle.h                |    4 +-
 src/gromacs/topology/atoms.cpp                |   17 +-
 src/gromacs/topology/atoms.h                  |    7 +-
 src/gromacs/topology/idef.cpp                 |   14 +-
 src/gromacs/topology/idef.h                   |   15 +-
 src/gromacs/topology/ifunc.cpp                |   12 +-
 src/programs/mdrun/md.cpp                     |   13 +-
 131 files changed, 1870 insertions(+), 32077 deletions(-)
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.cpp
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sparc64_hpc_ace_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c
 delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c
 create mode 100644 src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml
 delete mode 100644 src/gromacs/mdlib/genborn.cpp
 delete mode 100644 src/gromacs/mdlib/genborn.h
 delete mode 100644 src/gromacs/mdlib/genborn_allvsall.cpp
 delete mode 100644 src/gromacs/mdlib/genborn_allvsall.h

diff --git a/docs/manual/algorithms.tex b/docs/manual/algorithms.tex
index 514ae907c0..be04de79c9 100644
--- a/docs/manual/algorithms.tex
+++ b/docs/manual/algorithms.tex
@@ -3051,74 +3051,6 @@ and separate PME-mesh ranks.
 \end{figure}
 
 
-\section{Implicit solvation\index{implicit solvation}\index{Generalized Born methods}}
-\label{sec:gbsa}
-Implicit solvent models provide an efficient way of representing 
-the electrostatic effects of solvent molecules, while saving a 
-large piece of the computations involved in an accurate, aqueous 
-description of the surrounding water in molecular dynamics simulations. 
-Implicit solvation models offer several advantages compared with 
-explicit solvation, including eliminating the need for the equilibration of water 
-around the solute, and the absence of viscosity, which allows the protein 
-to more quickly explore conformational space.
-
-Implicit solvent calculations in {\gromacs} can be done using the 
-generalized Born-formalism, and the Still~\cite{Still97}, HCT~\cite{Truhlar96}, 
-and OBC~\cite{Case04} models are available for calculating the Born radii.
-
-Here, the free energy $G_{\mathrm{solv}}$ of solvation is the sum of three terms,
-a solvent-solvent cavity term ($G_{\mathrm{cav}}$), a solute-solvent van der
-Waals term ($G_{\mathrm{vdw}}$), and finally a solvent-solute electrostatics
-polarization term ($G_{\mathrm{pol}}$).
-
-The sum of $G_{\mathrm{cav}}$ and $G_{\mathrm{vdw}}$ corresponds to the (non-polar)
-free energy of solvation for a molecule from which all charges
-have been removed, and is commonly called $G_{\mathrm{np}}$,
-calculated from the total solvent accessible surface area 
-multiplied with a surface tension. 
-The total expression for the solvation free energy then becomes:
-
-\beq
-G_{\mathrm{solv}} = G_{\mathrm{np}}  + G_{\mathrm{pol}}
-\label{eqn:gb_solv}
-\eeq
-
-Under the generalized Born model, $G_{\mathrm{pol}}$ is calculated from the generalized Born equation~\cite{Still97}:
-
-\beq
-G_{\mathrm{pol}} = \left(1-\frac{1}{\epsilon}\right) \sum_{i=1}^n \sum_{j>i}^n \frac {q_i q_j}{\sqrt{r^2_{ij} + b_i b_j \exp\left(\frac{-r^2_{ij}}{4 b_i b_j}\right)}}
-\label{eqn:gb_still}
-\eeq
-
-In {\gromacs}, we have introduced the substitution~\cite{Larsson10}:
-
-\beq
-c_i=\frac{1}{\sqrt{b_i}}
-\label{eqn:gb_subst}
-\eeq
-
-which makes it possible to introduce a cheap transformation to a new 
-variable $x$ when evaluating each interaction, such that:
-
-\beq
-x=\frac{r_{ij}}{\sqrt{b_i b_j }} = r_{ij} c_i c_j
-\label{eqn:gb_subst2}
-\eeq
-
-In the end, the full re-formulation of~\ref{eqn:gb_still} becomes:
- 
-\beq
-G_{\mathrm{pol}} = \left(1-\frac{1}{\epsilon}\right) \sum_{i=1}^n \sum_{j>i}^n \frac{q_i q_j}{\sqrt{b_i  b_j}} ~\xi (x) = \left(1-\frac{1}{\epsilon}\right) \sum_{i=1}^n q_i c_i \sum_{j>i}^n q_j c_j~\xi (x)
-\label{eqn:gb_final}
-\eeq 
-
-The non-polar part ($G_{\mathrm{np}}$) of Equation~\ref{eqn:gb_solv} is calculated 
-directly from the Born radius of each atom using a simple ACE type 
-approximation by Schaefer {\em et al.}~\cite{Karplus98}, including a 
-simple loop over all atoms. 
-This requires only one extra solvation parameter, independent of atom type, 
-but differing slightly between the three Born radii models.
-
 % LocalWords:  GROningen MAchine BIOSON Groningen GROMACS Berendsen der Spoel
 % LocalWords:  Drunen Comp Phys Comm ROck NS FFT pbc EM ifthenelse gmxlite ff
 % LocalWords:  octahedra triclinic Ewald PME PPPM trjconv xy solvated
diff --git a/docs/manual/forcefield.tex b/docs/manual/forcefield.tex
index f50da64e64..0445b73d74 100644
--- a/docs/manual/forcefield.tex
+++ b/docs/manual/forcefield.tex
@@ -2891,7 +2891,7 @@ of the blocks. {\bf Note} that all {\gromacs} programs can read compressed
 \subsection{CHARMM\index{CHARMM force field}}
 \label{subsec:charmmff}
 
-{\gromacs} supports the CHARMM force field for proteins~\cite{mackerell04, mackerell98}, lipids~\cite{feller00} and nucleic acids~\cite{foloppe00,Mac2000}. The protein parameters (and to some extent the lipid and nucleic acid parameters) were thoroughly tested -- both by comparing potential energies between the port and the standard parameter set in the CHARMM molecular simulation package, as well by how the protein force field behaves together with {\gromacs}-specific techniques such as virtual sites (enabling long time steps) and a fast implicit solvent recently implemented~\cite{Larsson10} -- and the details and results are presented in the paper by Bjelkmar et al.~\cite{Bjelkmar10}. The nucleic acid parameters, as well as the ones for HEME, were converted and tested by Michel Cuendet.
+{\gromacs} supports the CHARMM force field for proteins~\cite{mackerell04, mackerell98}, lipids~\cite{feller00} and nucleic acids~\cite{foloppe00,Mac2000}. The protein parameters (and to some extent the lipid and nucleic acid parameters) were thoroughly tested -- both by comparing potential energies between the port and the standard parameter set in the CHARMM molecular simulation package, as well by how the protein force field behaves together with {\gromacs}-specific techniques such as virtual sites (enabling long time steps) recently implemented~\cite{Larsson10} -- and the details and results are presented in the paper by Bjelkmar et al.~\cite{Bjelkmar10}. The nucleic acid parameters, as well as the ones for HEME, were converted and tested by Michel Cuendet.
 
 When selecting the CHARMM force field in {\tt \normindex{pdb2gmx}} the default option is to use \normindex{CMAP} (for torsional correction map). To exclude CMAP, use {\tt -nocmap}. The basic form of the CMAP term implemented in {\gromacs} is a function of the $\phi$ and $\psi$ backbone torsion angles. This term is defined in the {\tt .rtp} file by a {\tt [ cmap ]} statement at the end of each residue supporting CMAP. The following five atom names define the two torsional angles. Atoms 1-4 define $\phi$, and atoms 2-5 define $\psi$. The corresponding atom types are then matched to the correct CMAP type in the {\tt cmap.itp} file that contains the correction maps.
 
diff --git a/docs/manual/topology.tex b/docs/manual/topology.tex
index 9336933c40..bfd0393b5a 100644
--- a/docs/manual/topology.tex
+++ b/docs/manual/topology.tex
@@ -557,37 +557,6 @@ to be excluded, is it more convenient and much more efficient to use
 energy monitor group exclusions (see \secref{groupconcept}).
 
 
-\section{Implicit solvation parameters\index{implicit solvation parameters}}
-Starting with {\gromacs} 4.5, implicit solvent is supported. A section in the
-topology has been introduced to list those parameters:
-
-{\small
-\begin{verbatim}
-[ implicit_genborn_params ]
-; Atomtype  sar     st   pi      gbr      hct
-NH1         0.155   1    1.028   0.17063  0.79 ; N
-N           0.155   1    1       0.155    0.79 ; Proline backbone N
-H           0.1     1    1       0.115    0.85 ; H
-CT1         0.180   1    1.276   0.190    0.72 ; C
-\end{verbatim}}
-
-In this example the atom type is listed first, followed by five
-numbers, and a comment (following a semicolon).
-
-Values in columns 1-3 are not currently used. They pertain to more
-elaborate surface area algorithms, the one from Qiu {\em et al.}~\cite{Still97} in
-particular.  Column 4 contains the atomic van der Waals radii, which are used
-in computing the Born radii. The dielectric offset is specified in
-the {\tt *.mdp} file, and gets subtracted from the input van der Waals radii for the different
-Born radii methods, as described by Onufriev {\em et al.}~\cite{Case04}.  Column 5 is the 
-scale factor for the HCT and OBC models. The values are taken from the Tinker implementation of 
-the HCT pairwise scaling method~\cite{Truhlar96}.  This method has been modified such that the
-scaling factors have been adjusted to minimize differences between analytical surface areas and
-GB using the HCT algorithm.  The scaling is further modified in that it is not applied pairwise
-as proposed by Hawkins {\em et al.}~\cite{Truhlar96}, but on a per-atom (rather than a per-pair) 
-basis.
-
-
 \section{Constraint algorithms\index{constraint algorithms}}
 \label{sec:constraints}
 Constraints are defined in the {\tt [~constraints~]} section.
@@ -1463,7 +1432,6 @@ its content. That content can be seen in
 
 #include "ffnonbonded.itp"
 #include "ffbonded.itp"
-#include "gbsa.itp"
 \end{verbatim}}
 
 The two {\tt \#define} statements set up the conditions so that
diff --git a/docs/user-guide/cutoff-schemes.rst b/docs/user-guide/cutoff-schemes.rst
index 5253b8fcc0..e78c35d251 100644
--- a/docs/user-guide/cutoff-schemes.rst
+++ b/docs/user-guide/cutoff-schemes.rst
@@ -57,7 +57,6 @@ force-switch interactions             yes          yes
 switched potential                    yes          yes
 switched forces                       yes          yes
 non-periodic systems                  yes          Z + walls
-implicit solvent                      yes          no
 free energy perturbed non-bondeds     yes          yes
 energy group contributions            yes          only on CPU
 energy group exclusions               yes          no
diff --git a/docs/user-guide/mdp-options.rst b/docs/user-guide/mdp-options.rst
index 66d210a56d..096cb4cbb6 100644
--- a/docs/user-guide/mdp-options.rst
+++ b/docs/user-guide/mdp-options.rst
@@ -3102,100 +3102,6 @@ Mixed quantum/classical molecular dynamics
       CASSCF method.
 
 
-Implicit solvent
-^^^^^^^^^^^^^^^^
-
-.. mdp:: implicit-solvent
-
-   .. mdp-value:: no
-
-      No implicit solvent
-
-   .. mdp-value:: GBSA
-
-      Do a simulation with implicit solvent using the Generalized Born
-      formalism. Three different methods for calculating the Born
-      radii are available, Still, HCT and OBC. These are specified
-      with the :mdp:`gb-algorithm` field. The non-polar solvation is
-      specified with the :mdp:`sa-algorithm` field.
-
-.. mdp:: gb-algorithm
-
-   .. mdp-value:: Still
-
-      Use the Still method to calculate the Born radii
-
-   .. mdp-value:: HCT
-
-      Use the Hawkins-Cramer-Truhlar method to calculate the Born
-      radii
-
-   .. mdp-value:: OBC
-
-      Use the Onufriev-Bashford-Case method to calculate the Born
-      radii
-
-.. mdp:: nstgbradii
-
-   (1) \[steps\]
-   Frequency to (re)-calculate the Born radii. For most practial
-   purposes, setting a value larger than 1 violates energy
-   conservation and leads to unstable trajectories.
-
-.. mdp:: rgbradii
-
-   (1.0) \[nm\]
-   Cut-off for the calculation of the Born radii. Currently must be
-   equal to rlist
-
-.. mdp:: gb-epsilon-solvent
-
-   (80)
-   Dielectric constant for the implicit solvent
-
-.. mdp:: gb-saltconc
-
-   (0) \[M\]
-   Salt concentration for implicit solvent models, currently not used
-
-.. mdp:: gb-obc-alpha
-.. mdp:: gb-obc-beta
-.. mdp:: gb-obc-gamma
-
-   Scale factors for the OBC model. Default values of 1, 0.78 and 4.85
-   respectively are for OBC(II). Values for OBC(I) are 0.8, 0 and 2.91
-   respectively
-
-.. mdp:: gb-dielectric-offset
-
-   (0.009) \[nm\]
-   Distance for the di-electric offset when calculating the Born
-   radii. This is the offset between the center of each atom the
-   center of the polarization energy for the corresponding atom
-
-.. mdp:: sa-algorithm
-
-   .. mdp-value:: Ace-approximation
-
-      Use an Ace-type approximation
-
-   .. mdp-value:: None
-
-      No non-polar solvation calculation done. For GBSA only the polar
-      part gets calculated
-
-.. mdp:: sa-surface-tension
-
-   \[kJ mol-1 nm-2\]
-   Default value for surface tension with SA algorithms. The default
-   value is -1; Note that if this default value is not changed it will
-   be overridden by :ref:`gmx grompp` using values that are specific
-   for the choice of radii algorithm (0.0049 kcal/mol/Angstrom^2 for
-   Still, 0.0054 kcal/mol/Angstrom2 for HCT/OBC) Setting it to 0 will
-   while using an sa-algorithm other than None means no non-polar
-   calculations are done.
-
-
 Computational Electrophysiology
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Use these options to switch on and control ion/water position exchanges in "Computational
@@ -3344,7 +3250,7 @@ User defined thingies
 Removed features
 ^^^^^^^^^^^^^^^^
 
-This feature has been removed from |Gromacs|, but so that old
+These features have been removed from |Gromacs|, but so that old
 :ref:`mdp` and :ref:`tpr` files cannot be mistakenly misused, we still
 parse this option. :ref:`gmx grompp` and :ref:`gmx mdrun` will issue a
 fatal error if this is set.
@@ -3353,4 +3259,8 @@ fatal error if this is set.
 
    (no)
 
+.. mdp:: implicit-solvent
+
+   (no)
+
 .. _reference manual: gmx-manual-parent-dir_
diff --git a/docs/user-guide/mdrun-performance.rst b/docs/user-guide/mdrun-performance.rst
index f460f6f4d6..859fc5f068 100644
--- a/docs/user-guide/mdrun-performance.rst
+++ b/docs/user-guide/mdrun-performance.rst
@@ -577,7 +577,6 @@ The performance counters are:
 * Neighbor search
 * Launch GPU operations
 * Communication of coordinates
-* Born radii
 * Force
 * Waiting + Communication of force
 * Particle mesh Ewald
diff --git a/docs/user-guide/terminology.rst b/docs/user-guide/terminology.rst
index e5ad730331..29eae4f5ef 100644
--- a/docs/user-guide/terminology.rst
+++ b/docs/user-guide/terminology.rst
@@ -333,7 +333,7 @@ when addressing such a scenario:
    to a breakdown in the model physics, even if the starting configuration of
    the system is reasonable.
 
-If using implicit solvation, starting your equilibration with a smaller time
+When using no explict solvent, starting your equilibration with a smaller time
 step than your production run can help energy equipartition more stably.
 
 There are several common situations in which instability frequently arises,
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp
index 33745e1be4..bb90e31364 100644
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -67,7 +67,6 @@
 #include "gromacs/mdlib/constr.h"
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/genborn.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/mdlib/mdatoms.h"
 #include "gromacs/mdlib/mdrun.h"
@@ -9755,11 +9754,6 @@ void dd_partition_system(FILE                *fplog,
     mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
                               nullptr, mdAtoms, vsite, nullptr);
 
-    if (ir->implicit_solvent)
-    {
-        make_local_gb(cr, fr->born, ir->gb_algorithm);
-    }
-
     auto mdatoms = mdAtoms->mdatoms();
     if (!thisRankHasDuty(cr, DUTY_PME))
     {
diff --git a/src/gromacs/fileio/tpxio.cpp b/src/gromacs/fileio/tpxio.cpp
index 6e3db2cd43..013718e7d4 100644
--- a/src/gromacs/fileio/tpxio.cpp
+++ b/src/gromacs/fileio/tpxio.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -118,6 +118,7 @@ enum tpxv {
     tpxv_PullExternalPotential,                              /**< Added pull type external potential */
     tpxv_GenericParamsForElectricField,                      /**< Introduced KeyValueTree and moved electric field parameters */
     tpxv_AcceleratedWeightHistogram,                         /**< sampling with accelerated weight histogram method (AWH) */
+    tpxv_RemoveImplicitSolvation,                            /**< removed support for implicit solvation */
     tpxv_Count                                               /**< the total number of tpxv versions */
 };
 
@@ -164,9 +165,18 @@ typedef struct {
 } t_ftupd;
 
 /*
+ * TODO The following three lines make little sense, please clarify if
+ * you've had to work out how ftupd works.
+ *
  * The entries should be ordered in:
  * 1. ascending function type number
  * 2. ascending file version number
+ *
+ * Because we support reading of old .tpr file versions (even when
+ * mdrun can no longer run the simulation), we need to be able to read
+ * obsolete t_interaction_function types. Any data read from such
+ * fields is discarded. Their names have _NOLONGERUSED appended to
+ * them to make things clear.
  */
 static const t_ftupd ftupd[] = {
     { 34, F_FENEBONDS         },
@@ -181,11 +191,11 @@ static const t_ftupd ftupd[] = {
     { tpxv_RestrictedBendingAndCombinedAngleTorsionPotentials, F_CBTDIHS },
     { 43, F_TABDIHS           },
     { 65, F_CMAP              },
-    { 60, F_GB12              },
-    { 61, F_GB13              },
-    { 61, F_GB14              },
-    { 72, F_GBPOL             },
-    { 72, F_NPSOLVATION       },
+    { 60, F_GB12_NOLONGERUSED },
+    { 61, F_GB13_NOLONGERUSED },
+    { 61, F_GB14_NOLONGERUSED },
+    { 72, F_GBPOL_NOLONGERUSED },
+    { 72, F_NPSOLVATION_NOLONGERUSED },
     { 41, F_LJC14_Q           },
     { 41, F_LJC_PAIRS_NB      },
     { 32, F_BHAM_LR_NOLONGERUSED },
@@ -1268,54 +1278,38 @@ static void do_inputrec(t_fileio *fio, t_inputrec *ir, gmx_bool bRead,
     }
     gmx_fio_do_real(fio, ir->tabext);
 
-    gmx_fio_do_int(fio, ir->gb_algorithm);
-    gmx_fio_do_int(fio, ir->nstgbradii);
-    gmx_fio_do_real(fio, ir->rgbradii);
-    gmx_fio_do_real(fio, ir->gb_saltconc);
-    gmx_fio_do_int(fio, ir->implicit_solvent);
-    if (file_version >= 55)
+    // This permits reading a .tpr file that used implicit solvent,
+    // and later permitting mdrun to refuse to run it.
+    if (bRead)
     {
-        gmx_fio_do_real(fio, ir->gb_epsilon_solvent);
-        gmx_fio_do_real(fio, ir->gb_obc_alpha);
-        gmx_fio_do_real(fio, ir->gb_obc_beta);
-        gmx_fio_do_real(fio, ir->gb_obc_gamma);
-        if (file_version >= 60)
+        if (file_version < tpxv_RemoveImplicitSolvation)
         {
-            gmx_fio_do_real(fio, ir->gb_dielectric_offset);
-            gmx_fio_do_int(fio, ir->sa_algorithm);
+            gmx_fio_do_int(fio, idum);
+            gmx_fio_do_int(fio, idum);
+            gmx_fio_do_real(fio, rdum);
+            gmx_fio_do_real(fio, rdum);
+            gmx_fio_do_int(fio, idum);
+            ir->implicit_solvent = (idum > 0);
         }
         else
         {
-            ir->gb_dielectric_offset = 0.009;
-            ir->sa_algorithm         = esaAPPROX;
+            ir->implicit_solvent = false;
         }
-        gmx_fio_do_real(fio, ir->sa_surface_tension);
-
-        /* Override sa_surface_tension if it is not changed in the mpd-file */
-        if (ir->sa_surface_tension < 0)
+        if (file_version >= 55 && file_version < tpxv_RemoveImplicitSolvation)
         {
-            if (ir->gb_algorithm == egbSTILL)
-            {
-                ir->sa_surface_tension = 0.0049 * 100 * CAL2JOULE;
-            }
-            else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
+            gmx_fio_do_real(fio, rdum);
+            gmx_fio_do_real(fio, rdum);
+            gmx_fio_do_real(fio, rdum);
+            gmx_fio_do_real(fio, rdum);
+            if (file_version >= 60)
             {
-                ir->sa_surface_tension = 0.0054 * 100 * CAL2JOULE;
+                gmx_fio_do_real(fio, rdum);
+                gmx_fio_do_int(fio, idum);
             }
+            gmx_fio_do_real(fio, rdum);
         }
-
-    }
-    else
-    {
-        /* Better use sensible values than insane (0.0) ones... */
-        ir->gb_epsilon_solvent = 80;
-        ir->gb_obc_alpha       = 1.0;
-        ir->gb_obc_beta        = 0.8;
-        ir->gb_obc_gamma       = 4.85;
-        ir->sa_surface_tension = 2.092;
     }
 
-
     if (file_version >= 81)
     {
         gmx_fio_do_real(fio, ir->fourier_spacing);
@@ -2119,22 +2113,28 @@ static void do_iparams(t_fileio *fio, t_functype ftype, t_iparams *iparams,
             gmx_fio_do_int(fio, iparams->vsiten.n);
             gmx_fio_do_real(fio, iparams->vsiten.a);
             break;
-        case F_GB12:
-        case F_GB13:
-        case F_GB14:
-            /* We got rid of some parameters in version 68 */
-            if (bRead && file_version < 68)
+        case F_GB12_NOLONGERUSED:
+        case F_GB13_NOLONGERUSED:
+        case F_GB14_NOLONGERUSED:
+            // Implicit solvent parameters can still be read, but never used
+            if (bRead)
             {
-                gmx_fio_do_real(fio, rdum);
-                gmx_fio_do_real(fio, rdum);
-                gmx_fio_do_real(fio, rdum);
-                gmx_fio_do_real(fio, rdum);
+                if (file_version < 68)
+                {
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                }
+                if (file_version < tpxv_RemoveImplicitSolvation)
+                {
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                    gmx_fio_do_real(fio, rdum);
+                }
             }
-            gmx_fio_do_real(fio, iparams->gb.sar);
-            gmx_fio_do_real(fio, iparams->gb.st);
-            gmx_fio_do_real(fio, iparams->gb.pi);
-            gmx_fio_do_real(fio, iparams->gb.gbr);
-            gmx_fio_do_real(fio, iparams->gb.bmlt);
             break;
         case F_CMAP:
             gmx_fio_do_int(fio, iparams->cmap.cmapA);
@@ -2631,24 +2631,24 @@ static void do_atomtypes(t_fileio *fio, t_atomtypes *atomtypes, gmx_bool bRead,
     j = atomtypes->nr;
     if (bRead)
     {
-        snew(atomtypes->radius, j);
-        snew(atomtypes->vol, j);
-        snew(atomtypes->surftens, j);
         snew(atomtypes->atomnumber, j);
-        snew(atomtypes->gb_radius, j);
-        snew(atomtypes->S_hct, j);
     }
-    gmx_fio_ndo_real(fio, atomtypes->radius, j);
-    gmx_fio_ndo_real(fio, atomtypes->vol, j);
-    gmx_fio_ndo_real(fio, atomtypes->surftens, j);
+    if (bRead && file_version < tpxv_RemoveImplicitSolvation)
+    {
+        std::vector<real> dummy(atomtypes->nr, 0);
+        gmx_fio_ndo_real(fio, dummy.data(), dummy.size());
+        gmx_fio_ndo_real(fio, dummy.data(), dummy.size());
+        gmx_fio_ndo_real(fio, dummy.data(), dummy.size());
+    }
     if (file_version >= 40)
     {
         gmx_fio_ndo_int(fio, atomtypes->atomnumber, j);
     }
-    if (file_version >= 60)
+    if (bRead && file_version >= 60 && file_version < tpxv_RemoveImplicitSolvation)
     {
-        gmx_fio_ndo_real(fio, atomtypes->gb_radius, j);
-        gmx_fio_ndo_real(fio, atomtypes->S_hct, j);
+        std::vector<real> dummy(atomtypes->nr, 0);
+        gmx_fio_ndo_real(fio, dummy.data(), dummy.size());
+        gmx_fio_ndo_real(fio, dummy.data(), dummy.size());
     }
 }
 
diff --git a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp
index 381e49c69f..a3c495c885 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp
+++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -568,10 +568,6 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
                                     break;
 
-                                case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
-                                    gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
-                                    break;
-
                                 case GMX_NBKERNEL_ELEC_EWALD:
                                     if (bConvertEwaldToCoulomb)
                                     {
diff --git a/src/gromacs/gmxlib/nonbonded/nb_generic.cpp b/src/gromacs/gmxlib/nonbonded/nb_generic.cpp
index 499b9f5d2d..b9ca9c7753 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_generic.cpp
+++ b/src/gromacs/gmxlib/nonbonded/nb_generic.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2012,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -293,11 +293,6 @@ gmx_nb_generic_kernel(t_nblist *                nlist,
                         felec            = -qq*FF*tabscale*rinv;
                         break;
 
-                    case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
-                        /* GB */
-                        gmx_fatal(FARGS, "Death & horror! GB generic interaction not implemented.\n");
-                        break;
-
                     case GMX_NBKERNEL_ELEC_EWALD:
                         ewrt             = rsq*rinv*ewtabscale;
                         ewitab           = ewrt;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel.h b/src/gromacs/gmxlib/nonbonded/nb_kernel.h
index d4f7ca31df..ba297da51e 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel.h
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel.h
@@ -70,7 +70,6 @@ typedef struct
     /* potentials */
     real *             energygrp_elec;
     real *             energygrp_vdw;
-    real *             energygrp_polarization;
 }
 nb_kernel_data_t;
 
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py
index 6c87a80af0..a0dd91f741 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c
deleted file mode 100644
index ac8fc85251..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c
+++ /dev/null
@@ -1,858 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_128_fma_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_128_fma_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        vvdwsum          = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_pd(rt);
-#else
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_pd(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            VV               = _mm_macc_pd(vfeps,Fp,Y);
-            vvdw6            = _mm_mul_pd(c6_00,VV);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            VV               = _mm_macc_pd(vfeps,Fp,Y);
-            vvdw12           = _mm_mul_pd(c12_00,VV);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            vvdw             = _mm_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 95 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_pd(rt);
-#else
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_pd(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            VV               = _mm_macc_pd(vfeps,Fp,Y);
-            vvdw6            = _mm_mul_pd(c6_00,VV);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            VV               = _mm_macc_pd(vfeps,Fp,Y);
-            vvdw12           = _mm_mul_pd(c12_00,VV);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            vvdw             = _mm_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 95 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*95);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_pd(rt);
-#else
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_pd(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 85 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_pd(rt);
-#else
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_pd(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + _mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(vfeps,_mm_macc_pd(H,vfeps,G),F);
-            FF               = _mm_macc_pd(vfeps,_mm_macc_pd(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 85 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*85);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c
deleted file mode 100644
index 477ee06cbd..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_128_fma_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_128_fma_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        vvdwsum          = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm_msub_pd( vvdw12,one_twelfth, _mm_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 74 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm_msub_pd( vvdw12,one_twelfth, _mm_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 74 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*74);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_pd(_mm_msub_pd(c12_00,rinvsix,c6_00),_mm_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 67 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_pd(_mm_msub_pd(c12_00,rinvsix,c6_00),_mm_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 67 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*67);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_double.c
deleted file mode 100644
index 7d3a9fdd4a..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_double.c
+++ /dev/null
@@ -1,665 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_128_fma_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_128_fma_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 61 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 61 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*61);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv00,fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Update vectorial force */
-            fix0             = _mm_macc_pd(dx00,fscal,fix0);
-            fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
-            
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,
-                                                   _mm_mul_pd(dx00,fscal),
-                                                   _mm_mul_pd(dy00,fscal),
-                                                   _mm_mul_pd(dz00,fscal));
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*59);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.c
index 224f4a4bd3..60775ca2e4 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_double;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_double;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_template_avx_128_fma_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_template_avx_128_fma_double.pre
index 0bbbec2976..487ed3b6ff 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_template_avx_128_fma_double.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_template_avx_128_fma_double.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -128,12 +128,6 @@ void
     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -142,7 +136,7 @@ void
     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
@@ -226,14 +220,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -360,9 +346,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm_load1_pd(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -374,16 +357,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm_setzero_pd();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm_setzero_pd();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm_setzero_pd();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm_setzero_pd();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -490,13 +467,6 @@ void
             /*         #else */
             jq{J}              = _mm_load_sd(charge+jnrA+{J});
             /*         #endif */
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if ROUND =='Loop' */
-            isaj{J}            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J});
-            /*             #else */
-            isaj{J}            = _mm_load_sd(invsqrta+jnrA+{J});
-            /*             #endif */
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -607,68 +577,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai{I},isaj{J});
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq{I}{J},_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r{I}{J},gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_pd(rt);
-#else
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            /*             #if ROUND == 'Loop' */
-            F                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            /*             #else */
-            F                = _mm_setzero_pd();
-            /*             #endif */
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,0) +2);
-            /*             #if ROUND == 'Loop' */
-            H                = _mm_load_pd( gbtab + _mm_extract_epi32(gbitab,1) +2);
-            /*             #else */
-            H                = _mm_setzero_pd();
-            /*             #endif */
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Fp               = _mm_macc_pd(gbeps,_mm_macc_pd(gbeps,H,G),F);
-            VV               = _mm_macc_pd(gbeps,Fp,Y);
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            twogbeps         = _mm_add_pd(gbeps,gbeps);
-            FF               = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r{I}{J},vgb));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            /*                 #endif */
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            /*             #if ROUND == 'Loop' */
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
-            /*             #else */
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
-            /*             #endif */
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _mm_mul_pd(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm_mul_pd(_mm_msub_pd(velec,rinv{I}{J},fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -955,17 +863,6 @@ void
             /*             #endif */
             velecsum         = _mm_add_pd(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm_and_pd(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            /*             #endif */
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
@@ -1110,19 +1007,11 @@ void
         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai{I},isai{I}));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py
index 7f264cad92..a80e244a54 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c
deleted file mode 100644
index 1e7160bc96..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c
+++ /dev/null
@@ -1,978 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_128_fma_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_128_fma_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        vvdwsum          = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_ps(rt);
-#else
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_ps(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            VV               = _mm_macc_ps(vfeps,Fp,Y);
-            vvdw6            = _mm_mul_ps(c6_00,VV);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            VV               = _mm_macc_ps(vfeps,Fp,Y);
-            vvdw12           = _mm_mul_ps(c12_00,VV);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            vvdw             = _mm_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 95 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_ps(rt);
-#else
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_ps(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            VV               = _mm_macc_ps(vfeps,Fp,Y);
-            vvdw6            = _mm_mul_ps(c6_00,VV);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            VV               = _mm_macc_ps(vfeps,Fp,Y);
-            vvdw12           = _mm_mul_ps(c12_00,VV);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            vvdw             = _mm_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdw             = _mm_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 96 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*96);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_ps(rt);
-#else
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_ps(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 85 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            vfeps            = _mm_frcz_ps(rt);
-#else
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            twovfeps         = _mm_add_ps(vfeps,vfeps);
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + _mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(vfeps,_mm_macc_ps(H,vfeps,G),F);
-            FF               = _mm_macc_ps(vfeps,_mm_macc_ps(twovfeps,H,G),Fp);
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 86 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*86);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c
deleted file mode 100644
index aa4c0b6b0e..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c
+++ /dev/null
@@ -1,864 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_128_fma_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_128_fma_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        vvdwsum          = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm_msub_ps(vvdw12,one_twelfth,_mm_mul_ps(vvdw6,one_sixth));
-            fvdw             = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 74 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm_msub_ps(vvdw12,one_twelfth,_mm_mul_ps(vvdw6,one_sixth));
-            fvdw             = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdw             = _mm_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 75 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*75);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_ps(_mm_msub_ps(c12_00,rinvsix,c6_00),_mm_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 67 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_ps(_mm_msub_ps(c12_00,rinvsix,c6_00),_mm_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 68 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*68);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c
deleted file mode 100644
index 737a4e83fa..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c
+++ /dev/null
@@ -1,769 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_128_fma_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_128_fma_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 61 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 62 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*62);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX_128, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            fscal            = felec;
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx128fma_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv00,fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-             /* Update vectorial force */
-            fix0             = _mm_macc_ps(dx00,fscal,fix0);
-            fiy0             = _mm_macc_ps(dy00,fscal,fiy0);
-            fiz0             = _mm_macc_ps(dz00,fscal,fiz0);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                   _mm_mul_ps(dx00,fscal),
-                                                   _mm_mul_ps(dy00,fscal),
-                                                   _mm_mul_ps(dz00,fscal));
-
-            /* Inner loop uses 60 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*60);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c
index c87976462a..6489837de0 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_single;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre
index fc3469af5e..29d3517924 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -131,12 +131,6 @@ void
     __m128           velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,twogbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -145,7 +139,7 @@ void
     __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
     __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m128           rt,vfeps,twovfeps,vftabscale,Y,F,G,H,Fp,VV,FF;
@@ -233,14 +227,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -374,9 +360,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm_load1_ps(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -388,16 +371,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm_setzero_ps();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm_setzero_ps();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm_setzero_ps();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm_setzero_ps();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -502,10 +479,6 @@ void
             /*     #for J in PARTICLES_ELEC_J */
             jq{J}              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+{J},charge+jnrB+{J},
                                                               charge+jnrC+{J},charge+jnrD+{J});
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            isaj{J}            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J},
-                                                              invsqrta+jnrC+{J},invsqrta+jnrD+{J});
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -621,67 +594,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai{I},isaj{J});
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq{I}{J},_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r{I}{J},gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-#ifdef __XOP__
-            gbeps            = _mm_frcz_ps(rt);
-#else
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-#endif
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + _mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Fp               = _mm_macc_ps(gbeps,_mm_macc_ps(gbeps,H,G),F);
-            VV               = _mm_macc_ps(gbeps,Fp,Y);
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            twogbeps         = _mm_add_ps(gbeps,gbeps);
-            FF               = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r{I}{J},vgb));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            /*                 #endif */
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /*                 #if ROUND == 'Loop' */
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            /*                 #else */
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            /*                 #endif */
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj{J},isaj{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _mm_mul_ps(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm_mul_ps(_mm_msub_ps(velec,rinv{I}{J},fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -907,17 +819,6 @@ void
             /*             #endif */
             velecsum         = _mm_add_ps(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm_and_ps(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            /*             #endif */
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*     ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */
@@ -1070,19 +971,11 @@ void
         gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai{I},isai{I}));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py
index d4cfc54224..3240b74352 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c
deleted file mode 100644
index 8d3df8b683..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c
+++ /dev/null
@@ -1,972 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_256_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_256_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m256d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m256d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256d          one_sixth   = _mm256_set1_pd(1.0/6.0);
-    __m256d          one_twelfth = _mm256_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256d          dummy_mask,cutoff_mask;
-    __m128           tmpmask0,tmpmask1;
-    __m256d          signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) );
-    __m256d          one     = _mm256_set1_pd(1.0);
-    __m256d          two     = _mm256_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm256_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_pd();
-        fiy0             = _mm256_setzero_pd();
-        fiz0             = _mm256_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+0]));
-        isai0            = _mm256_set1_pd(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm256_setzero_pd();
-        vgbsum           = _mm256_setzero_pd();
-        vvdwsum          = _mm256_setzero_pd();
-        dvdasum          = _mm256_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_pd(r00,vftabscale);
-            vfitab           = _mm256_cvttpd_epi32(rt);
-            vfeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp));
-            vvdw6            = _mm256_mul_pd(c6_00,VV);
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw6            = _mm256_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp));
-            vvdw12           = _mm256_mul_pd(c12_00,VV);
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw12           = _mm256_mul_pd(c12_00,FF);
-            vvdw             = _mm256_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm256_add_pd(velecsum,velec);
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm256_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 91 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
-             */
-            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-
-            tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
-            tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
-            dummy_mask = _mm256_castps_pd(gmx_mm256_set_m128(tmpmask1,tmpmask0));
-
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-            r00              = _mm256_andnot_pd(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_pd(r00,vftabscale);
-            vfitab           = _mm256_cvttpd_epi32(rt);
-            vfeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp));
-            vvdw6            = _mm256_mul_pd(c6_00,VV);
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw6            = _mm256_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(vfeps,Fp));
-            vvdw12           = _mm256_mul_pd(c12_00,VV);
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw12           = _mm256_mul_pd(c12_00,FF);
-            vvdw             = _mm256_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm256_andnot_pd(dummy_mask,velec);
-            velecsum         = _mm256_add_pd(velecsum,velec);
-            vgb              = _mm256_andnot_pd(dummy_mask,vgb);
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-            vvdw             = _mm256_andnot_pd(dummy_mask,vvdw);
-            vvdwsum          = _mm256_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            fscal            = _mm256_andnot_pd(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm256_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m256d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m256d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256d          one_sixth   = _mm256_set1_pd(1.0/6.0);
-    __m256d          one_twelfth = _mm256_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256d          dummy_mask,cutoff_mask;
-    __m128           tmpmask0,tmpmask1;
-    __m256d          signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) );
-    __m256d          one     = _mm256_set1_pd(1.0);
-    __m256d          two     = _mm256_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm256_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_pd();
-        fiy0             = _mm256_setzero_pd();
-        fiz0             = _mm256_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+0]));
-        isai0            = _mm256_set1_pd(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm256_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_pd(r00,vftabscale);
-            vfitab           = _mm256_cvttpd_epi32(rt);
-            vfeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw6            = _mm256_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw12           = _mm256_mul_pd(c12_00,FF);
-            fvdw             = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 81 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
-             */
-            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-
-            tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
-            tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
-            dummy_mask = _mm256_castps_pd(gmx_mm256_set_m128(tmpmask1,tmpmask0));
-
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-            r00              = _mm256_andnot_pd(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_pd(r00,vftabscale);
-            vfitab           = _mm256_cvttpd_epi32(rt);
-            vfeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw6            = _mm256_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,0) );
-            F                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,1) );
-            G                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,2) );
-            H                = _mm256_load_pd( vftab + _mm_extract_epi32(vfitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(vfeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(vfeps,_mm256_add_pd(G,Heps)));
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(vfeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fvdw12           = _mm256_mul_pd(c12_00,FF);
-            fvdw             = _mm256_xor_pd(signbit,_mm256_mul_pd(_mm256_add_pd(fvdw6,fvdw12),_mm256_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            fscal            = _mm256_andnot_pd(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*82);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c
deleted file mode 100644
index 132b4e69b3..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_256_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_256_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m256d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m256d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256d          one_sixth   = _mm256_set1_pd(1.0/6.0);
-    __m256d          one_twelfth = _mm256_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256d          dummy_mask,cutoff_mask;
-    __m128           tmpmask0,tmpmask1;
-    __m256d          signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) );
-    __m256d          one     = _mm256_set1_pd(1.0);
-    __m256d          two     = _mm256_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_pd();
-        fiy0             = _mm256_setzero_pd();
-        fiz0             = _mm256_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+0]));
-        isai0            = _mm256_set1_pd(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm256_setzero_pd();
-        vgbsum           = _mm256_setzero_pd();
-        vvdwsum          = _mm256_setzero_pd();
-        dvdasum          = _mm256_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm256_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm256_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm256_mul_pd(c12_00,_mm256_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm256_sub_pd( _mm256_mul_pd(vvdw12,one_twelfth) , _mm256_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm256_mul_pd(_mm256_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm256_add_pd(velecsum,velec);
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm256_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 70 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
-             */
-            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-
-            tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
-            tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
-            dummy_mask = _mm256_castps_pd(gmx_mm256_set_m128(tmpmask1,tmpmask0));
-
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm256_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-            r00              = _mm256_andnot_pd(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm256_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm256_mul_pd(c12_00,_mm256_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm256_sub_pd( _mm256_mul_pd(vvdw12,one_twelfth) , _mm256_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm256_mul_pd(_mm256_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm256_andnot_pd(dummy_mask,velec);
-            velecsum         = _mm256_add_pd(velecsum,velec);
-            vgb              = _mm256_andnot_pd(dummy_mask,vgb);
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-            vvdw             = _mm256_andnot_pd(dummy_mask,vvdw);
-            vvdwsum          = _mm256_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            fscal            = _mm256_andnot_pd(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm256_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m256d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m256d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256d          one_sixth   = _mm256_set1_pd(1.0/6.0);
-    __m256d          one_twelfth = _mm256_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256d          dummy_mask,cutoff_mask;
-    __m128           tmpmask0,tmpmask1;
-    __m256d          signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) );
-    __m256d          one     = _mm256_set1_pd(1.0);
-    __m256d          two     = _mm256_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_pd();
-        fiy0             = _mm256_setzero_pd();
-        fiz0             = _mm256_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+0]));
-        isai0            = _mm256_set1_pd(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm256_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm256_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(c12_00,rinvsix),c6_00),_mm256_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 63 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
-             */
-            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-
-            tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
-            tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
-            dummy_mask = _mm256_castps_pd(gmx_mm256_set_m128(tmpmask1,tmpmask0));
-
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm256_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-            r00              = _mm256_andnot_pd(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-            gmx_mm256_load_4pair_swizzle_pd(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_pd(_mm256_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(c12_00,rinvsix),c6_00),_mm256_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm256_add_pd(felec,fvdw);
-
-            fscal            = _mm256_andnot_pd(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c
deleted file mode 100644
index a64785a563..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c
+++ /dev/null
@@ -1,775 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_256_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_256_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m256d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m256d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256d          dummy_mask,cutoff_mask;
-    __m128           tmpmask0,tmpmask1;
-    __m256d          signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) );
-    __m256d          one     = _mm256_set1_pd(1.0);
-    __m256d          two     = _mm256_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_pd();
-        fiy0             = _mm256_setzero_pd();
-        fiz0             = _mm256_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+0]));
-        isai0            = _mm256_set1_pd(invsqrta[inr+0]);
-
-        /* Reset potential sums */
-        velecsum         = _mm256_setzero_pd();
-        vgbsum           = _mm256_setzero_pd();
-        dvdasum          = _mm256_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm256_add_pd(velecsum,velec);
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
-             */
-            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-
-            tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
-            tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
-            dummy_mask = _mm256_castps_pd(gmx_mm256_set_m128(tmpmask1,tmpmask0));
-
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-            r00              = _mm256_andnot_pd(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm256_andnot_pd(dummy_mask,velec);
-            velecsum         = _mm256_add_pd(velecsum,velec);
-            vgb              = _mm256_andnot_pd(dummy_mask,vgb);
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm256_andnot_pd(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with AVX, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m256d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m256d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256d          dummy_mask,cutoff_mask;
-    __m128           tmpmask0,tmpmask1;
-    __m256d          signbit = _mm256_castsi256_pd( _mm256_set1_epi32(0x80000000) );
-    __m256d          one     = _mm256_set1_pd(1.0);
-    __m256d          two     = _mm256_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_pd();
-        fiy0             = _mm256_setzero_pd();
-        fiz0             = _mm256_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+0]));
-        isai0            = _mm256_set1_pd(invsqrta[inr+0]);
-
-        dvdasum          = _mm256_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 55 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
-             */
-            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-
-            tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
-            tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
-            dummy_mask = _mm256_castps_pd(gmx_mm256_set_m128(tmpmask1,tmpmask0));
-
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_4ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_pd(ix0,jx0);
-            dy00             = _mm256_sub_pd(iy0,jy0);
-            dz00             = _mm256_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_pd(rsq00,rinv00);
-            r00              = _mm256_andnot_pd(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq00,_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r00,gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj0,isaj0)));
-            velec            = _mm256_mul_pd(qq00,rinv00);
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm256_andnot_pd(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_pd(fscal,dx00);
-            ty               = _mm256_mul_pd(fscal,dy00);
-            tz               = _mm256_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_pd(fix0,tx);
-            fiy0             = _mm256_add_pd(fiy0,ty);
-            fiz0             = _mm256_add_pd(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm256_decrement_1rvec_4ptr_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai0,isai0));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*56);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c
index 9265074e27..b695cfff25 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_double;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre
index a25ec0e390..f0197c104d 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -135,12 +135,6 @@ void
     __m256d          velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m256d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256d          minushalf = _mm256_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m256d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -149,7 +143,7 @@ void
     __m256d          one_sixth   = _mm256_set1_pd(1.0/6.0);
     __m256d          one_twelfth = _mm256_set1_pd(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m256d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
@@ -239,14 +233,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -383,9 +369,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm256_mul_pd(facel,_mm256_set1_pd(charge[inr+{I}]));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm256_set1_pd(invsqrta[inr+{I}]);
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffsetptr{I}   = vdwparam+2*nvdwtype*vdwtype[inr+{I}];
@@ -400,16 +383,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm256_setzero_pd();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm256_setzero_pd();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm256_setzero_pd();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm256_setzero_pd();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -519,10 +496,6 @@ void
             /*     #for J in PARTICLES_ELEC_J */
             jq{J}              = gmx_mm256_load_4real_swizzle_pd(charge+jnrA+{J},charge+jnrB+{J},
                                                                  charge+jnrC+{J},charge+jnrD+{J});
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            isaj{J}            = gmx_mm256_load_4real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J},
-                                                                 invsqrta+jnrC+{J},invsqrta+jnrD+{J});
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -632,63 +605,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_pd(isai{I},isaj{J});
-            gbqqfactor       = _mm256_xor_pd(signbit,_mm256_mul_pd(qq{I}{J},_mm256_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_pd(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_pd(r{I}{J},gbscale);
-            gbitab           = _mm256_cvttpd_epi32(rt);
-            gbeps            = _mm256_sub_pd(rt,_mm256_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,0) );
-            F                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,1) );
-            G                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,2) );
-            H                = _mm256_load_pd( gbtab + _mm_extract_epi32(gbitab,3) );
-            GMX_MM256_FULLTRANSPOSE4_PD(Y,F,G,H);
-            Heps             = _mm256_mul_pd(gbeps,H);
-            Fp               = _mm256_add_pd(F,_mm256_mul_pd(gbeps,_mm256_add_pd(G,Heps)));
-            VV               = _mm256_add_pd(Y,_mm256_mul_pd(gbeps,Fp));
-            vgb              = _mm256_mul_pd(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
-            fgb              = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
-            dvdatmp          = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r{I}{J})));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm256_andnot_pd(dummy_mask,dvdatmp);
-            /*                 #endif */
-            dvdasum          = _mm256_add_pd(dvdasum,dvdatmp);
-            /*                 #if ROUND == 'Loop' */
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            /*                 #else */
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            /*                 #endif */
-            gmx_mm256_increment_4real_swizzle_pd(fjptrA,fjptrB,fjptrC,fjptrD,
-                                                 _mm256_mul_pd(dvdatmp,_mm256_mul_pd(isaj{J},isaj{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+12 */
-            /*             #endif */
-            velec            = _mm256_mul_pd(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm256_mul_pd(_mm256_sub_pd(_mm256_mul_pd(velec,rinv{I}{J}),fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -935,17 +851,6 @@ void
             /*             #endif */
             velecsum         = _mm256_add_pd(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm256_and_pd(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm256_andnot_pd(dummy_mask,vgb);
-            /*             #endif */
-            vgbsum           = _mm256_add_pd(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*     ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */
@@ -1098,19 +1003,11 @@ void
         gmx_mm256_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm256_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm256_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm256_mul_pd(dvdasum, _mm256_mul_pd(isai{I},isai{I}));
-        gmx_mm256_update_1pot_pd(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py
index b44938008e..dbb439ce39 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c
deleted file mode 100644
index 7eb97eec87..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c
+++ /dev/null
@@ -1,1192 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_256_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_256_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrE,jnrF,jnrG,jnrH;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH;
-    real             scratch[4*DIM];
-    __m256           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H;
-    __m256           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256           one_sixth   = _mm256_set1_ps(1.0/6.0);
-    __m256           one_twelfth = _mm256_set1_ps(1.0/12.0);
-    __m256i          vfitab;
-    __m128i          vfitab_lo,vfitab_hi;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256           dummy_mask,cutoff_mask;
-    __m256           signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) );
-    __m256           one     = _mm256_set1_ps(1.0);
-    __m256           two     = _mm256_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm256_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-    j_coord_offsetE = 0;
-    j_coord_offsetF = 0;
-    j_coord_offsetG = 0;
-    j_coord_offsetH = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_ps();
-        fiy0             = _mm256_setzero_ps();
-        fiz0             = _mm256_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+0]));
-        isai0            = _mm256_set1_ps(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm256_setzero_ps();
-        vgbsum           = _mm256_setzero_ps();
-        vvdwsum          = _mm256_setzero_ps();
-        dvdasum          = _mm256_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+7]>=0; jidx+=8)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            jnrE             = jjnr[jidx+4];
-            jnrF             = jjnr[jidx+5];
-            jnrG             = jjnr[jidx+6];
-            jnrH             = jjnr[jidx+7];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_ps(r00,vftabscale);
-            vfitab           = _mm256_cvttps_epi32(rt);
-            vfeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            vfitab_lo        = _mm256_extractf128_si256(vfitab,0x0);
-            vfitab_hi        = _mm256_extractf128_si256(vfitab,0x1);
-            vfitab_lo        = _mm_slli_epi32(vfitab_lo,3);
-            vfitab_hi        = _mm_slli_epi32(vfitab_hi,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp));
-            vvdw6            = _mm256_mul_ps(c6_00,VV);
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw6            = _mm256_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab_lo        = _mm_add_epi32(vfitab_lo,ifour);
-            vfitab_hi        = _mm_add_epi32(vfitab_hi,ifour);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp));
-            vvdw12           = _mm256_mul_ps(c12_00,VV);
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw12           = _mm256_mul_ps(c12_00,FF);
-            vvdw             = _mm256_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm256_add_ps(velecsum,velec);
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm256_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            fjptrE             = f+j_coord_offsetE;
-            fjptrF             = f+j_coord_offsetF;
-            fjptrG             = f+j_coord_offsetG;
-            fjptrH             = f+j_coord_offsetH;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 91 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            jnrlistE         = jjnr[jidx+4];
-            jnrlistF         = jjnr[jidx+5];
-            jnrlistG         = jjnr[jidx+6];
-            jnrlistH         = jjnr[jidx+7];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm256_set_m128(gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx+4)),_mm_setzero_si128())),
-                                            gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128())));
-                                            
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            jnrE       = (jnrlistE>=0) ? jnrlistE : 0;
-            jnrF       = (jnrlistF>=0) ? jnrlistF : 0;
-            jnrG       = (jnrlistG>=0) ? jnrlistG : 0;
-            jnrH       = (jnrlistH>=0) ? jnrlistH : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-            r00              = _mm256_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_ps(r00,vftabscale);
-            vfitab           = _mm256_cvttps_epi32(rt);
-            vfeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            vfitab_lo        = _mm256_extractf128_si256(vfitab,0x0);
-            vfitab_hi        = _mm256_extractf128_si256(vfitab,0x1);
-            vfitab_lo        = _mm_slli_epi32(vfitab_lo,3);
-            vfitab_hi        = _mm_slli_epi32(vfitab_hi,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp));
-            vvdw6            = _mm256_mul_ps(c6_00,VV);
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw6            = _mm256_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab_lo        = _mm_add_epi32(vfitab_lo,ifour);
-            vfitab_hi        = _mm_add_epi32(vfitab_hi,ifour);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(vfeps,Fp));
-            vvdw12           = _mm256_mul_ps(c12_00,VV);
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw12           = _mm256_mul_ps(c12_00,FF);
-            vvdw             = _mm256_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm256_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm256_add_ps(velecsum,velec);
-            vgb              = _mm256_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-            vvdw             = _mm256_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm256_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            fscal            = _mm256_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            fjptrE             = (jnrlistE>=0) ? f+j_coord_offsetE : scratch;
-            fjptrF             = (jnrlistF>=0) ? f+j_coord_offsetF : scratch;
-            fjptrG             = (jnrlistG>=0) ? f+j_coord_offsetG : scratch;
-            fjptrH             = (jnrlistH>=0) ? f+j_coord_offsetH : scratch;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm256_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrE,jnrF,jnrG,jnrH;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH;
-    real             scratch[4*DIM];
-    __m256           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H;
-    __m256           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256           one_sixth   = _mm256_set1_ps(1.0/6.0);
-    __m256           one_twelfth = _mm256_set1_ps(1.0/12.0);
-    __m256i          vfitab;
-    __m128i          vfitab_lo,vfitab_hi;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256           dummy_mask,cutoff_mask;
-    __m256           signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) );
-    __m256           one     = _mm256_set1_ps(1.0);
-    __m256           two     = _mm256_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm256_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-    j_coord_offsetE = 0;
-    j_coord_offsetF = 0;
-    j_coord_offsetG = 0;
-    j_coord_offsetH = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_ps();
-        fiy0             = _mm256_setzero_ps();
-        fiz0             = _mm256_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+0]));
-        isai0            = _mm256_set1_ps(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm256_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+7]>=0; jidx+=8)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            jnrE             = jjnr[jidx+4];
-            jnrF             = jjnr[jidx+5];
-            jnrG             = jjnr[jidx+6];
-            jnrH             = jjnr[jidx+7];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_ps(r00,vftabscale);
-            vfitab           = _mm256_cvttps_epi32(rt);
-            vfeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            vfitab_lo        = _mm256_extractf128_si256(vfitab,0x0);
-            vfitab_hi        = _mm256_extractf128_si256(vfitab,0x1);
-            vfitab_lo        = _mm_slli_epi32(vfitab_lo,3);
-            vfitab_hi        = _mm_slli_epi32(vfitab_hi,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw6            = _mm256_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab_lo        = _mm_add_epi32(vfitab_lo,ifour);
-            vfitab_hi        = _mm_add_epi32(vfitab_hi,ifour);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw12           = _mm256_mul_ps(c12_00,FF);
-            fvdw             = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            fjptrE             = f+j_coord_offsetE;
-            fjptrF             = f+j_coord_offsetF;
-            fjptrG             = f+j_coord_offsetG;
-            fjptrH             = f+j_coord_offsetH;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 81 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            jnrlistE         = jjnr[jidx+4];
-            jnrlistF         = jjnr[jidx+5];
-            jnrlistG         = jjnr[jidx+6];
-            jnrlistH         = jjnr[jidx+7];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm256_set_m128(gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx+4)),_mm_setzero_si128())),
-                                            gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128())));
-                                            
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            jnrE       = (jnrlistE>=0) ? jnrlistE : 0;
-            jnrF       = (jnrlistF>=0) ? jnrlistF : 0;
-            jnrG       = (jnrlistG>=0) ? jnrlistG : 0;
-            jnrH       = (jnrlistH>=0) ? jnrlistH : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-            r00              = _mm256_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm256_mul_ps(r00,vftabscale);
-            vfitab           = _mm256_cvttps_epi32(rt);
-            vfeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            vfitab_lo        = _mm256_extractf128_si256(vfitab,0x0);
-            vfitab_hi        = _mm256_extractf128_si256(vfitab,0x1);
-            vfitab_lo        = _mm_slli_epi32(vfitab_lo,3);
-            vfitab_hi        = _mm_slli_epi32(vfitab_hi,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw6            = _mm256_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab_lo        = _mm_add_epi32(vfitab_lo,ifour);
-            vfitab_hi        = _mm_add_epi32(vfitab_hi,ifour);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,0)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,1)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,2)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(vftab + _mm_extract_epi32(vfitab_hi,3)),
-                                                  _mm_load_ps(vftab + _mm_extract_epi32(vfitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(vfeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(vfeps,_mm256_add_ps(G,Heps)));
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(vfeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fvdw12           = _mm256_mul_ps(c12_00,FF);
-            fvdw             = _mm256_xor_ps(signbit,_mm256_mul_ps(_mm256_add_ps(fvdw6,fvdw12),_mm256_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            fscal            = _mm256_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            fjptrE             = (jnrlistE>=0) ? f+j_coord_offsetE : scratch;
-            fjptrF             = (jnrlistF>=0) ? f+j_coord_offsetF : scratch;
-            fjptrG             = (jnrlistG>=0) ? f+j_coord_offsetG : scratch;
-            fjptrH             = (jnrlistH>=0) ? f+j_coord_offsetH : scratch;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*82);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c
deleted file mode 100644
index 6dfbecb6a4..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c
+++ /dev/null
@@ -1,1038 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_256_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_256_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrE,jnrF,jnrG,jnrH;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH;
-    real             scratch[4*DIM];
-    __m256           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H;
-    __m256           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256           one_sixth   = _mm256_set1_ps(1.0/6.0);
-    __m256           one_twelfth = _mm256_set1_ps(1.0/12.0);
-    __m256i          vfitab;
-    __m128i          vfitab_lo,vfitab_hi;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256           dummy_mask,cutoff_mask;
-    __m256           signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) );
-    __m256           one     = _mm256_set1_ps(1.0);
-    __m256           two     = _mm256_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-    j_coord_offsetE = 0;
-    j_coord_offsetF = 0;
-    j_coord_offsetG = 0;
-    j_coord_offsetH = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_ps();
-        fiy0             = _mm256_setzero_ps();
-        fiz0             = _mm256_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+0]));
-        isai0            = _mm256_set1_ps(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm256_setzero_ps();
-        vgbsum           = _mm256_setzero_ps();
-        vvdwsum          = _mm256_setzero_ps();
-        dvdasum          = _mm256_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+7]>=0; jidx+=8)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            jnrE             = jjnr[jidx+4];
-            jnrF             = jjnr[jidx+5];
-            jnrG             = jjnr[jidx+6];
-            jnrH             = jjnr[jidx+7];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm256_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm256_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm256_mul_ps(c12_00,_mm256_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm256_sub_ps( _mm256_mul_ps(vvdw12,one_twelfth) , _mm256_mul_ps(vvdw6,one_sixth) );
-            fvdw             = _mm256_mul_ps(_mm256_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm256_add_ps(velecsum,velec);
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm256_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            fjptrE             = f+j_coord_offsetE;
-            fjptrF             = f+j_coord_offsetF;
-            fjptrG             = f+j_coord_offsetG;
-            fjptrH             = f+j_coord_offsetH;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 70 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            jnrlistE         = jjnr[jidx+4];
-            jnrlistF         = jjnr[jidx+5];
-            jnrlistG         = jjnr[jidx+6];
-            jnrlistH         = jjnr[jidx+7];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm256_set_m128(gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx+4)),_mm_setzero_si128())),
-                                            gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128())));
-                                            
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            jnrE       = (jnrlistE>=0) ? jnrlistE : 0;
-            jnrF       = (jnrlistF>=0) ? jnrlistF : 0;
-            jnrG       = (jnrlistG>=0) ? jnrlistG : 0;
-            jnrH       = (jnrlistH>=0) ? jnrlistH : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm256_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-            r00              = _mm256_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm256_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm256_mul_ps(c12_00,_mm256_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm256_sub_ps( _mm256_mul_ps(vvdw12,one_twelfth) , _mm256_mul_ps(vvdw6,one_sixth) );
-            fvdw             = _mm256_mul_ps(_mm256_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm256_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm256_add_ps(velecsum,velec);
-            vgb              = _mm256_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-            vvdw             = _mm256_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm256_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            fscal            = _mm256_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            fjptrE             = (jnrlistE>=0) ? f+j_coord_offsetE : scratch;
-            fjptrF             = (jnrlistF>=0) ? f+j_coord_offsetF : scratch;
-            fjptrG             = (jnrlistG>=0) ? f+j_coord_offsetG : scratch;
-            fjptrH             = (jnrlistH>=0) ? f+j_coord_offsetH : scratch;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm256_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrE,jnrF,jnrG,jnrH;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH;
-    real             scratch[4*DIM];
-    __m256           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H;
-    __m256           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m256           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m256           one_sixth   = _mm256_set1_ps(1.0/6.0);
-    __m256           one_twelfth = _mm256_set1_ps(1.0/12.0);
-    __m256i          vfitab;
-    __m128i          vfitab_lo,vfitab_hi;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256           dummy_mask,cutoff_mask;
-    __m256           signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) );
-    __m256           one     = _mm256_set1_ps(1.0);
-    __m256           two     = _mm256_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-    j_coord_offsetE = 0;
-    j_coord_offsetF = 0;
-    j_coord_offsetG = 0;
-    j_coord_offsetH = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_ps();
-        fiy0             = _mm256_setzero_ps();
-        fiz0             = _mm256_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+0]));
-        isai0            = _mm256_set1_ps(invsqrta[inr+0]);
-        vdwioffsetptr0   = vdwparam+2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm256_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+7]>=0; jidx+=8)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            jnrE             = jjnr[jidx+4];
-            jnrF             = jjnr[jidx+5];
-            jnrG             = jjnr[jidx+6];
-            jnrH             = jjnr[jidx+7];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm256_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(c12_00,rinvsix),c6_00),_mm256_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            fjptrE             = f+j_coord_offsetE;
-            fjptrF             = f+j_coord_offsetF;
-            fjptrG             = f+j_coord_offsetG;
-            fjptrH             = f+j_coord_offsetH;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 63 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            jnrlistE         = jjnr[jidx+4];
-            jnrlistF         = jjnr[jidx+5];
-            jnrlistG         = jjnr[jidx+6];
-            jnrlistH         = jjnr[jidx+7];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm256_set_m128(gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx+4)),_mm_setzero_si128())),
-                                            gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128())));
-                                            
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            jnrE       = (jnrlistE>=0) ? jnrlistE : 0;
-            jnrF       = (jnrlistF>=0) ? jnrlistF : 0;
-            jnrG       = (jnrlistG>=0) ? jnrlistG : 0;
-            jnrH       = (jnrlistH>=0) ? jnrlistH : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm256_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-            vdwjidx0E        = 2*vdwtype[jnrE+0];
-            vdwjidx0F        = 2*vdwtype[jnrF+0];
-            vdwjidx0G        = 2*vdwtype[jnrG+0];
-            vdwjidx0H        = 2*vdwtype[jnrH+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-            r00              = _mm256_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-            gmx_mm256_load_8pair_swizzle_ps(vdwioffsetptr0+vdwjidx0A,
-                                            vdwioffsetptr0+vdwjidx0B,
-                                            vdwioffsetptr0+vdwjidx0C,
-                                            vdwioffsetptr0+vdwjidx0D,
-                                            vdwioffsetptr0+vdwjidx0E,
-                                            vdwioffsetptr0+vdwjidx0F,
-                                            vdwioffsetptr0+vdwjidx0G,
-                                            vdwioffsetptr0+vdwjidx0H,
-                                            &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm256_mul_ps(_mm256_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(c12_00,rinvsix),c6_00),_mm256_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm256_add_ps(felec,fvdw);
-
-            fscal            = _mm256_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            fjptrE             = (jnrlistE>=0) ? f+j_coord_offsetE : scratch;
-            fjptrF             = (jnrlistF>=0) ? f+j_coord_offsetF : scratch;
-            fjptrG             = (jnrlistG>=0) ? f+j_coord_offsetG : scratch;
-            fjptrH             = (jnrlistH>=0) ? f+j_coord_offsetH : scratch;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c
deleted file mode 100644
index 3c4db3bb86..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c
+++ /dev/null
@@ -1,911 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS avx_256_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_avx_256_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrE,jnrF,jnrG,jnrH;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH;
-    real             scratch[4*DIM];
-    __m256           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H;
-    __m256           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m256i          vfitab;
-    __m128i          vfitab_lo,vfitab_hi;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256           dummy_mask,cutoff_mask;
-    __m256           signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) );
-    __m256           one     = _mm256_set1_ps(1.0);
-    __m256           two     = _mm256_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-    j_coord_offsetE = 0;
-    j_coord_offsetF = 0;
-    j_coord_offsetG = 0;
-    j_coord_offsetH = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_ps();
-        fiy0             = _mm256_setzero_ps();
-        fiz0             = _mm256_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+0]));
-        isai0            = _mm256_set1_ps(invsqrta[inr+0]);
-
-        /* Reset potential sums */
-        velecsum         = _mm256_setzero_ps();
-        vgbsum           = _mm256_setzero_ps();
-        dvdasum          = _mm256_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+7]>=0; jidx+=8)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            jnrE             = jjnr[jidx+4];
-            jnrF             = jjnr[jidx+5];
-            jnrG             = jjnr[jidx+6];
-            jnrH             = jjnr[jidx+7];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm256_add_ps(velecsum,velec);
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            fjptrE             = f+j_coord_offsetE;
-            fjptrF             = f+j_coord_offsetF;
-            fjptrG             = f+j_coord_offsetG;
-            fjptrH             = f+j_coord_offsetH;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            jnrlistE         = jjnr[jidx+4];
-            jnrlistF         = jjnr[jidx+5];
-            jnrlistG         = jjnr[jidx+6];
-            jnrlistH         = jjnr[jidx+7];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm256_set_m128(gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx+4)),_mm_setzero_si128())),
-                                            gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128())));
-                                            
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            jnrE       = (jnrlistE>=0) ? jnrlistE : 0;
-            jnrF       = (jnrlistF>=0) ? jnrlistF : 0;
-            jnrG       = (jnrlistG>=0) ? jnrlistG : 0;
-            jnrH       = (jnrlistH>=0) ? jnrlistH : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-            r00              = _mm256_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm256_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm256_add_ps(velecsum,velec);
-            vgb              = _mm256_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm256_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            fjptrE             = (jnrlistE>=0) ? f+j_coord_offsetE : scratch;
-            fjptrF             = (jnrlistF>=0) ? f+j_coord_offsetF : scratch;
-            fjptrG             = (jnrlistG>=0) ? f+j_coord_offsetG : scratch;
-            fjptrH             = (jnrlistH>=0) ? f+j_coord_offsetH : scratch;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D,E,F,G,H refer to j loop unrolling done with AVX, e.g. for the eight different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrE,jnrF,jnrG,jnrH;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              jnrlistE,jnrlistF,jnrlistG,jnrlistH;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              j_coord_offsetE,j_coord_offsetF,j_coord_offsetG,j_coord_offsetH;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD,*fjptrE,*fjptrF,*fjptrG,*fjptrH;
-    real             scratch[4*DIM];
-    __m256           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    real *           vdwioffsetptr0;
-    __m256           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D,vdwjidx0E,vdwjidx0F,vdwjidx0G,vdwjidx0H;
-    __m256           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m256           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m256           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m256i          vfitab;
-    __m128i          vfitab_lo,vfitab_hi;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m256           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m256           dummy_mask,cutoff_mask;
-    __m256           signbit = _mm256_castsi256_ps( _mm256_set1_epi32(0x80000000) );
-    __m256           one     = _mm256_set1_ps(1.0);
-    __m256           two     = _mm256_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm256_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = jnrE = jnrF = jnrG = jnrH = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-    j_coord_offsetE = 0;
-    j_coord_offsetF = 0;
-    j_coord_offsetG = 0;
-    j_coord_offsetH = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm256_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm256_setzero_ps();
-        fiy0             = _mm256_setzero_ps();
-        fiz0             = _mm256_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+0]));
-        isai0            = _mm256_set1_ps(invsqrta[inr+0]);
-
-        dvdasum          = _mm256_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+7]>=0; jidx+=8)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            jnrE             = jjnr[jidx+4];
-            jnrF             = jjnr[jidx+5];
-            jnrG             = jjnr[jidx+6];
-            jnrH             = jjnr[jidx+7];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            fjptrE             = f+j_coord_offsetE;
-            fjptrF             = f+j_coord_offsetF;
-            fjptrG             = f+j_coord_offsetG;
-            fjptrH             = f+j_coord_offsetH;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 55 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            jnrlistE         = jjnr[jidx+4];
-            jnrlistF         = jjnr[jidx+5];
-            jnrlistG         = jjnr[jidx+6];
-            jnrlistH         = jjnr[jidx+7];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm256_set_m128(gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx+4)),_mm_setzero_si128())),
-                                            gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128())));
-                                            
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            jnrE       = (jnrlistE>=0) ? jnrlistE : 0;
-            jnrF       = (jnrlistF>=0) ? jnrlistF : 0;
-            jnrG       = (jnrlistG>=0) ? jnrlistG : 0;
-            jnrH       = (jnrlistH>=0) ? jnrlistH : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-            j_coord_offsetE  = DIM*jnrE;
-            j_coord_offsetF  = DIM*jnrF;
-            j_coord_offsetG  = DIM*jnrG;
-            j_coord_offsetH  = DIM*jnrH;
-
-            /* load j atom coordinates */
-            gmx_mm256_load_1rvec_8ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                                 x+j_coord_offsetC,x+j_coord_offsetD,
-                                                 x+j_coord_offsetE,x+j_coord_offsetF,
-                                                 x+j_coord_offsetG,x+j_coord_offsetH,
-                                                 &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm256_sub_ps(ix0,jx0);
-            dy00             = _mm256_sub_ps(iy0,jy0);
-            dz00             = _mm256_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm256_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = avx256_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm256_load_8real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                                 charge+jnrC+0,charge+jnrD+0,
-                                                                 charge+jnrE+0,charge+jnrF+0,
-                                                                 charge+jnrG+0,charge+jnrH+0);
-            isaj0            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                                 invsqrta+jnrC+0,invsqrta+jnrD+0,
-                                                                 invsqrta+jnrE+0,invsqrta+jnrF+0,
-                                                                 invsqrta+jnrG+0,invsqrta+jnrH+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm256_mul_ps(rsq00,rinv00);
-            r00              = _mm256_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm256_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq00,_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r00,gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj0,isaj0)));
-            velec            = _mm256_mul_ps(qq00,rinv00);
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm256_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm256_mul_ps(fscal,dx00);
-            ty               = _mm256_mul_ps(fscal,dy00);
-            tz               = _mm256_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm256_add_ps(fix0,tx);
-            fiy0             = _mm256_add_ps(fiy0,ty);
-            fiz0             = _mm256_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            fjptrE             = (jnrlistE>=0) ? f+j_coord_offsetE : scratch;
-            fjptrF             = (jnrlistF>=0) ? f+j_coord_offsetF : scratch;
-            fjptrG             = (jnrlistG>=0) ? f+j_coord_offsetG : scratch;
-            fjptrH             = (jnrlistH>=0) ? f+j_coord_offsetH : scratch;
-            gmx_mm256_decrement_1rvec_8ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm256_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                                 f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai0,isai0));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*56);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c
index b4debc8f40..5a70e18520 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_single;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre
index 837fce7857..d4c8e38f35 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -140,13 +140,6 @@ void
     __m256           velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m256i          gbitab;
-    __m128i          gbitab_lo,gbitab_hi;
-    __m256           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m256           minushalf = _mm256_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m256           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -155,7 +148,7 @@ void
     __m256           one_sixth   = _mm256_set1_ps(1.0/6.0);
     __m256           one_twelfth = _mm256_set1_ps(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m256i          vfitab;
     __m128i          vfitab_lo,vfitab_hi;
     __m128i          ifour       = _mm_set1_epi32(4);
@@ -246,14 +239,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm256_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm256_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -394,9 +379,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm256_mul_ps(facel,_mm256_set1_ps(charge[inr+{I}]));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm256_set1_ps(invsqrta[inr+{I}]);
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffsetptr{I}   = vdwparam+2*nvdwtype*vdwtype[inr+{I}];
@@ -411,16 +393,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm256_setzero_ps();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm256_setzero_ps();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm256_setzero_ps();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm256_setzero_ps();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -553,12 +529,6 @@ void
                                                                  charge+jnrC+{J},charge+jnrD+{J},
                                                                  charge+jnrE+{J},charge+jnrF+{J},
                                                                  charge+jnrG+{J},charge+jnrH+{J});
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            isaj{J}            = gmx_mm256_load_8real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J},
-                                                                 invsqrta+jnrC+{J},invsqrta+jnrD+{J},
-                                                                 invsqrta+jnrE+{J},invsqrta+jnrF+{J},
-                                                                 invsqrta+jnrG+{J},invsqrta+jnrH+{J});
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -686,79 +656,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm256_mul_ps(isai{I},isaj{J});
-            gbqqfactor       = _mm256_xor_ps(signbit,_mm256_mul_ps(qq{I}{J},_mm256_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm256_mul_ps(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm256_mul_ps(r{I}{J},gbscale);
-            gbitab           = _mm256_cvttps_epi32(rt);
-            gbeps            = _mm256_sub_ps(rt,_mm256_round_ps(rt, _MM_FROUND_FLOOR));
-            /*         AVX1 does not support 256-bit integer operations, so now we go to 128-bit mode... */
-            gbitab_lo        = _mm256_extractf128_si256(gbitab,0x0);
-            gbitab_hi        = _mm256_extractf128_si256(gbitab,0x1);
-            gbitab_lo        = _mm_slli_epi32(gbitab_lo,2);
-            gbitab_hi        = _mm_slli_epi32(gbitab_hi,2);
-            Y                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,0)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,0)));
-            F                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,1)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,1)));
-            G                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,2)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,2)));
-            H                = gmx_mm256_set_m128(_mm_load_ps(gbtab + _mm_extract_epi32(gbitab_hi,3)),
-                                                  _mm_load_ps(gbtab + _mm_extract_epi32(gbitab_lo,3)));
-            GMX_MM256_HALFTRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm256_mul_ps(gbeps,H);
-            Fp               = _mm256_add_ps(F,_mm256_mul_ps(gbeps,_mm256_add_ps(G,Heps)));
-            VV               = _mm256_add_ps(Y,_mm256_mul_ps(gbeps,Fp));
-            vgb              = _mm256_mul_ps(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
-            fgb              = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
-            dvdatmp          = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r{I}{J})));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm256_andnot_ps(dummy_mask,dvdatmp);
-            /*                 #endif */
-            dvdasum          = _mm256_add_ps(dvdasum,dvdatmp);
-            /*                 #if ROUND == 'Loop' */
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            fjptrE           = dvda+jnrE;
-            fjptrF           = dvda+jnrF;
-            fjptrG           = dvda+jnrG;
-            fjptrH           = dvda+jnrH;
-            /*                 #else */
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            fjptrE             = (jnrlistE>=0) ? dvda+jnrE : scratch;
-            fjptrF             = (jnrlistF>=0) ? dvda+jnrF : scratch;
-            fjptrG             = (jnrlistG>=0) ? dvda+jnrG : scratch;
-            fjptrH             = (jnrlistH>=0) ? dvda+jnrH : scratch;
-            /*                 #endif */
-            gmx_mm256_increment_8real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjptrE,fjptrF,fjptrG,fjptrH,
-                                                 _mm256_mul_ps(dvdatmp,_mm256_mul_ps(isaj{J},isaj{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+12 */
-            /*             #endif */
-            velec            = _mm256_mul_ps(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm256_mul_ps(_mm256_sub_ps(_mm256_mul_ps(velec,rinv{I}{J}),fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
             
@@ -1005,17 +902,6 @@ void
             /*             #endif */
             velecsum         = _mm256_add_ps(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm256_and_ps(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm256_andnot_ps(dummy_mask,vgb);
-            /*             #endif */
-            vgbsum           = _mm256_add_ps(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*     ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */
@@ -1185,19 +1071,11 @@ void
         gmx_mm256_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm256_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm256_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm256_mul_ps(dvdasum, _mm256_mul_ps(isai{I},isai{I}));
-        gmx_mm256_update_1pot_ps(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
index 53f5022bff..bba8574011 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
deleted file mode 100644
index 343dd09dc4..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014.2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS c kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            Buckingham
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = 0.0;
-        vgbsum           = 0.0;
-        vvdwsum          = 0.0;
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            rinvsq00         = rinv00*rinv00;
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-            vdwjidx0         = 3*vdwtype[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
-            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
-            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* BUCKINGHAM DISPERSION/REPULSION */
-            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
-            vvdw6            = c6_00*rinvsix;
-            br               = cexp2_00*r00;
-            vvdwexp          = cexp1_00*exp(-br);
-            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
-            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
-
-            /* Update potential sums from outer loop */
-            velecsum        += velec;
-            vgbsum          += vgb;
-            vvdwsum         += vvdw;
-
-            fscal            = felec+fvdw;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 97 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        kernel_data->energygrp_elec[ggid] += velecsum;
-        kernel_data->energygrp_polarization[ggid] += vgbsum;
-        kernel_data->energygrp_vdw[ggid] += vvdwsum;
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 16 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*97);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            Buckingham
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            rinvsq00         = rinv00*rinv00;
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-            vdwjidx0         = 3*vdwtype[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
-            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
-            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* BUCKINGHAM DISPERSION/REPULSION */
-            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
-            vvdw6            = c6_00*rinvsix;
-            br               = cexp2_00*r00;
-            vvdwexp          = cexp1_00*exp(-br);
-            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
-
-            fscal            = felec+fvdw;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 92 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 13 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*92);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
deleted file mode 100644
index 83a453f291..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014.2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS c kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = kernel_data->table_vdw->scale;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = 0.0;
-        vgbsum           = 0.0;
-        vvdwsum          = 0.0;
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-            vdwjidx0         = 2*vdwtype[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
-            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = r00*vftabscale;
-            vfitab           = rt;
-            vfeps            = rt-vfitab;
-            vfitab           = 2*4*vfitab;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfitab          += 0;
-            Y                = vftab[vfitab];
-            F                = vftab[vfitab+1];
-            Geps             = vfeps*vftab[vfitab+2];
-            Heps2            = vfeps*vfeps*vftab[vfitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+vfeps*Fp;
-            vvdw6            = c6_00*VV;
-            FF               = Fp+Geps+2.0*Heps2;
-            fvdw6            = c6_00*FF;
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = vftab[vfitab+4];
-            F                = vftab[vfitab+5];
-            Geps             = vfeps*vftab[vfitab+6];
-            Heps2            = vfeps*vfeps*vftab[vfitab+7];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+vfeps*Fp;
-            vvdw12           = c12_00*VV;
-            FF               = Fp+Geps+2.0*Heps2;
-            fvdw12           = c12_00*FF;
-            vvdw             = vvdw12+vvdw6;
-            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
-
-            /* Update potential sums from outer loop */
-            velecsum        += velec;
-            vgbsum          += vgb;
-            vvdwsum         += vvdw;
-
-            fscal            = felec+fvdw;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 91 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        kernel_data->energygrp_elec[ggid] += velecsum;
-        kernel_data->energygrp_polarization[ggid] += vgbsum;
-        kernel_data->energygrp_vdw[ggid] += vvdwsum;
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 16 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*91);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = kernel_data->table_vdw->scale;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-            vdwjidx0         = 2*vdwtype[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
-            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = r00*vftabscale;
-            vfitab           = rt;
-            vfeps            = rt-vfitab;
-            vfitab           = 2*4*vfitab;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfitab          += 0;
-            F                = vftab[vfitab+1];
-            Geps             = vfeps*vftab[vfitab+2];
-            Heps2            = vfeps*vfeps*vftab[vfitab+3];
-            Fp               = F+Geps+Heps2;
-            FF               = Fp+Geps+2.0*Heps2;
-            fvdw6            = c6_00*FF;
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            F                = vftab[vfitab+5];
-            Geps             = vfeps*vftab[vfitab+6];
-            Heps2            = vfeps*vfeps*vftab[vfitab+7];
-            Fp               = F+Geps+Heps2;
-            FF               = Fp+Geps+2.0*Heps2;
-            fvdw12           = c12_00*FF;
-            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
-
-            fscal            = felec+fvdw;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 81 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 13 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*81);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
deleted file mode 100644
index bef7707439..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014.2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS c kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = 0.0;
-        vgbsum           = 0.0;
-        vvdwsum          = 0.0;
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            rinvsq00         = rinv00*rinv00;
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-            vdwjidx0         = 2*vdwtype[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
-            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
-            vvdw6            = c6_00*rinvsix;
-            vvdw12           = c12_00*rinvsix*rinvsix;
-            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
-            fvdw             = (vvdw12-vvdw6)*rinvsq00;
-
-            /* Update potential sums from outer loop */
-            velecsum        += velec;
-            vgbsum          += vgb;
-            vvdwsum         += vvdw;
-
-            fscal            = felec+fvdw;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 71 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        kernel_data->energygrp_elec[ggid] += velecsum;
-        kernel_data->energygrp_polarization[ggid] += vgbsum;
-        kernel_data->energygrp_vdw[ggid] += vvdwsum;
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 16 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*71);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            rinvsq00         = rinv00*rinv00;
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-            vdwjidx0         = 2*vdwtype[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
-            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
-            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
-
-            fscal            = felec+fvdw;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 64 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 13 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
deleted file mode 100644
index b803724508..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014.2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS c kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = 0.0;
-        vgbsum           = 0.0;
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            /* Update potential sums from outer loop */
-            velecsum        += velec;
-            vgbsum          += vgb;
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 58 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        kernel_data->energygrp_elec[ggid] += velecsum;
-        kernel_data->energygrp_polarization[ggid] += vgbsum;
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 15 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*15 + inneriter*58);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    int              i_shift_offset,i_coord_offset,j_coord_offset;
-    int              j_index_start,j_index_end;
-    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
-    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             *shiftvec,*fshift,*x,*f;
-    int              vdwioffset0;
-    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0;
-    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
-    real             velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    int              vfitab;
-    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
-    real             *vftab;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = fr->ic->epsfac;
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-        shX              = shiftvec[i_shift_offset+XX];
-        shY              = shiftvec[i_shift_offset+YY];
-        shZ              = shiftvec[i_shift_offset+ZZ];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        ix0              = shX + x[i_coord_offset+DIM*0+XX];
-        iy0              = shY + x[i_coord_offset+DIM*0+YY];
-        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
-
-        fix0             = 0.0;
-        fiy0             = 0.0;
-        fiz0             = 0.0;
-
-        /* Load parameters for i particles */
-        iq0              = facel*charge[inr+0];
-        isai0            = invsqrta[inr+0];
-
-        dvdasum          = 0.0;
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end; jidx++)
-        {
-            /* Get j neighbor index, and coordinate index */
-            jnr              = jjnr[jidx];
-            j_coord_offset   = DIM*jnr;
-
-            /* load j atom coordinates */
-            jx0              = x[j_coord_offset+DIM*0+XX];
-            jy0              = x[j_coord_offset+DIM*0+YY];
-            jz0              = x[j_coord_offset+DIM*0+ZZ];
-
-            /* Calculate displacement vector */
-            dx00             = ix0 - jx0;
-            dy00             = iy0 - jy0;
-            dz00             = iz0 - jz0;
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
-
-            rinv00           = 1.0/sqrt(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = charge[jnr+0];
-            isaj0           = invsqrta[jnr+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = rsq00*rinv00;
-
-            qq00             = iq0*jq0;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai0*isaj0;
-            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+0];
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r00*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r00);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
-            velec            = qq00*rinv00;
-            felec            = (velec*rinv00-fgb)*rinv00;
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = fscal*dx00;
-            ty               = fscal*dy00;
-            tz               = fscal*dz00;
-
-            /* Update vectorial force */
-            fix0            += tx;
-            fiy0            += ty;
-            fiz0            += tz;
-            f[j_coord_offset+DIM*0+XX] -= tx;
-            f[j_coord_offset+DIM*0+YY] -= ty;
-            f[j_coord_offset+DIM*0+ZZ] -= tz;
-
-            /* Inner loop uses 56 flops */
-        }
-        /* End of innermost loop */
-
-        tx = ty = tz = 0;
-        f[i_coord_offset+DIM*0+XX] += fix0;
-        f[i_coord_offset+DIM*0+YY] += fiy0;
-        f[i_coord_offset+DIM*0+ZZ] += fiz0;
-        tx                         += fix0;
-        ty                         += fiy0;
-        tz                         += fiz0;
-        fshift[i_shift_offset+XX]  += tx;
-        fshift[i_shift_offset+YY]  += ty;
-        fshift[i_shift_offset+ZZ]  += tz;
-
-        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 13 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*56);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.cpp
deleted file mode 100644
index d666d13fd0..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.cpp
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "nb_kernel_allvsallgb.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/utility/real.h"
-#include "gromacs/utility/smalloc.h"
-
-typedef struct
-{
-    real **    pvdwparam;
-    int *      jindex;
-    int **     exclusion_mask;
-}
-gmx_allvsall_data_t;
-
-static int
-calc_maxoffset(int i, int natoms)
-{
-    int maxoffset;
-
-    if ((natoms % 2) == 1)
-    {
-        /* Odd number of atoms, easy */
-        maxoffset = natoms/2;
-    }
-    else if ((natoms % 4) == 0)
-    {
-        /* Multiple of four is hard */
-        if (i < natoms/2)
-        {
-            if ((i % 2) == 0)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-        else
-        {
-            if ((i % 2) == 1)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-    }
-    else
-    {
-        /* natoms/2 = odd */
-        if ((i % 2) == 0)
-        {
-            maxoffset = natoms/2;
-        }
-        else
-        {
-            maxoffset = natoms/2-1;
-        }
-    }
-
-    return maxoffset;
-}
-
-
-static void
-setup_exclusions_and_indices(gmx_allvsall_data_t *   aadata,
-                             t_blocka *              excl,
-                             int                     natoms)
-{
-    int i, j, k;
-    int nj0, nj1;
-    int max_offset;
-    int max_excl_offset;
-    int iexcl;
-
-    /* This routine can appear to be a bit complex, but it is mostly book-keeping.
-     * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
-     * whether they should interact or not.
-     *
-     * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
-     * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
-     * we create a jindex array with three elements per i atom: the starting point, the point to
-     * which we need to check exclusions, and the end point.
-     * This way we only have to allocate a short exclusion mask per i atom.
-     */
-
-    /* Allocate memory for our modified jindex array */
-    snew(aadata->jindex, 3*natoms);
-
-    /* Pointer to lists with exclusion masks */
-    snew(aadata->exclusion_mask, natoms);
-
-    for (i = 0; i < natoms; i++)
-    {
-        /* Start */
-        aadata->jindex[3*i]   = i+1;
-        max_offset            = calc_maxoffset(i, natoms);
-
-        /* Exclusions */
-        nj0   = excl->index[i];
-        nj1   = excl->index[i+1];
-
-        /* first check the max range */
-        max_excl_offset = -1;
-
-        for (j = nj0; j < nj1; j++)
-        {
-            iexcl = excl->a[j];
-
-            k = iexcl - i;
-
-            if (k+natoms <= max_offset)
-            {
-                k += natoms;
-            }
-
-            max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-        }
-
-        max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
-
-        aadata->jindex[3*i+1] = i+1+max_excl_offset;
-
-        snew(aadata->exclusion_mask[i], max_excl_offset);
-        /* Include everything by default */
-        for (j = 0; j < max_excl_offset; j++)
-        {
-            /* Use all-ones to mark interactions that should be present, compatible with SSE */
-            aadata->exclusion_mask[i][j] = 0xFFFFFFFF;
-        }
-
-        /* Go through exclusions again */
-        for (j = nj0; j < nj1; j++)
-        {
-            iexcl = excl->a[j];
-
-            k = iexcl - i;
-
-            if (k+natoms <= max_offset)
-            {
-                k += natoms;
-            }
-
-            if (k > 0 && k <= max_excl_offset)
-            {
-                /* Excluded, kill it! */
-                aadata->exclusion_mask[i][k-1] = 0;
-            }
-        }
-
-        /* End */
-        aadata->jindex[3*i+2] = i+1+max_offset;
-    }
-}
-
-
-static void
-setup_aadata(gmx_allvsall_data_t **  p_aadata,
-             t_blocka *              excl,
-             int                     natoms,
-             int *                   type,
-             int                     ntype,
-             real *                  pvdwparam)
-{
-    int                  i, j, idx;
-    gmx_allvsall_data_t *aadata;
-    real                *p;
-
-    snew(aadata, 1);
-    *p_aadata = aadata;
-
-    /* Generate vdw params */
-    snew(aadata->pvdwparam, ntype);
-
-    for (i = 0; i < ntype; i++)
-    {
-        snew(aadata->pvdwparam[i], 2*natoms);
-        p = aadata->pvdwparam[i];
-
-        /* Lets keep it simple and use multiple steps - first create temp. c6/c12 arrays */
-        for (j = 0; j < natoms; j++)
-        {
-            idx             = i*ntype+type[j];
-            p[2*j]          = pvdwparam[2*idx];
-            p[2*j+1]        = pvdwparam[2*idx+1];
-        }
-    }
-
-    setup_exclusions_and_indices(aadata, excl, natoms);
-}
-
-
-
-void
-nb_kernel_allvsallgb(t_nblist gmx_unused *     nlist,
-                     rvec *                    xx,
-                     rvec *                    ff,
-                     struct t_forcerec *       fr,
-                     t_mdatoms *               mdatoms,
-                     nb_kernel_data_t *        kernel_data,
-                     t_nrnb *                  nrnb)
-{
-    gmx_allvsall_data_t *aadata;
-    int                  natoms;
-    int                  ni0, ni1;
-    int                  nj0, nj1, nj2;
-    int                  i, j, k;
-    real           *     charge;
-    int           *      type;
-    real                 facel;
-    real           *     pvdw;
-    int                  ggid;
-    int           *      mask;
-    real           *     GBtab;
-    real                 gbfactor;
-    real           *     invsqrta;
-    real           *     dvda;
-    real                 vgbtot, dvdasum;
-    int                  nnn, n0;
-
-    real                 ix, iy, iz, iq;
-    real                 fix, fiy, fiz;
-    real                 jx, jy, jz, qq;
-    real                 dx, dy, dz;
-    real                 tx, ty, tz;
-    real                 rsq, rinv, rinvsq, rinvsix;
-    real                 vcoul, vctot;
-    real                 c6, c12, Vvdw6, Vvdw12, Vvdwtot;
-    real                 fscal, dvdatmp, fijC, vgb;
-    real                 Y, F, Fp, Geps, Heps2, VV, FF, eps, eps2, r, rt;
-    real                 dvdaj, gbscale, isaprod, isai, isaj, gbtabscale;
-    real           *     f;
-    real           *     x;
-    t_blocka           * excl;
-    real           *     Vvdw;
-    real           *     Vc;
-    real           *     vpol;
-
-    x                   = xx[0];
-    f                   = ff[0];
-    charge              = mdatoms->chargeA;
-    type                = mdatoms->typeA;
-    gbfactor            = ((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    facel               = fr->ic->epsfac;
-    GBtab               = fr->gbtab->data;
-    gbtabscale          = fr->gbtab->scale;
-    invsqrta            = fr->invsqrta;
-    dvda                = fr->dvda;
-    vpol                = kernel_data->energygrp_polarization;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    aadata              = reinterpret_cast<gmx_allvsall_data_t *>(fr->AllvsAll_work);
-    excl                = kernel_data->exclusions;
-
-    Vc                  = kernel_data->energygrp_elec;
-    Vvdw                = kernel_data->energygrp_vdw;
-
-    if (aadata == NULL)
-    {
-        setup_aadata(&aadata, excl, natoms, type, fr->ntype, fr->nbfp);
-        fr->AllvsAll_work  = aadata;
-    }
-
-    for (i = ni0; i < ni1; i++)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix                = x[3*i];
-        iy                = x[3*i+1];
-        iz                = x[3*i+2];
-        iq                = facel*charge[i];
-
-        isai              = invsqrta[i];
-
-        pvdw              = aadata->pvdwparam[type[i]];
-
-        /* Zero the potential energy for this list */
-        Vvdwtot           = 0.0;
-        vctot             = 0.0;
-        vgbtot            = 0.0;
-        dvdasum           = 0.0;
-
-        /* Clear i atom forces */
-        fix               = 0.0;
-        fiy               = 0.0;
-        fiz               = 0.0;
-
-        /* Load limits for loop over neighbors */
-        nj0              = aadata->jindex[3*i];
-        nj1              = aadata->jindex[3*i+1];
-        nj2              = aadata->jindex[3*i+2];
-
-        mask             = aadata->exclusion_mask[i];
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j++, mask++)
-        {
-            if (*mask != 0)
-            {
-                k = j%natoms;
-
-                /* load j atom coordinates */
-                jx                = x[3*k];
-                jy                = x[3*k+1];
-                jz                = x[3*k+2];
-
-                /* Calculate distance */
-                dx                = ix - jx;
-                dy                = iy - jy;
-                dz                = iz - jz;
-                rsq               = dx*dx+dy*dy+dz*dz;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv             = 1.0/sqrt(rsq);
-
-                /* Load parameters for j atom */
-                isaj              = invsqrta[k];
-                isaprod           = isai*isaj;
-                qq                = iq*charge[k];
-                vcoul             = qq*rinv;
-                fscal             = vcoul*rinv;
-                qq                = isaprod*(-qq)*gbfactor;
-                gbscale           = isaprod*gbtabscale;
-                c6                = pvdw[2*k];
-                c12               = pvdw[2*k+1];
-                rinvsq            = rinv*rinv;
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[k];
-                r                = rsq*rinv;
-
-                /* Calculate table index */
-                rt               = r*gbscale;
-                n0               = rt;
-                eps              = rt-n0;
-                eps2             = eps*eps;
-                nnn              = 4*n0;
-                Y                = GBtab[nnn];
-                F                = GBtab[nnn+1];
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;
-                VV               = Y+eps*Fp;
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;
-                fijC             = qq*FF*gbscale;
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[k]          = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;
-                vgbtot           = vgbtot + vgb;
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv;
-
-                /* Calculate temporary vectorial force */
-                tx                = fscal*dx;
-                ty                = fscal*dy;
-                tz                = fscal*dz;
-
-                /* Increment i atom force */
-                fix               = fix + tx;
-                fiy               = fiy + ty;
-                fiz               = fiz + tz;
-
-                /* Decrement j atom force */
-                f[3*k]            = f[3*k]   - tx;
-                f[3*k+1]          = f[3*k+1] - ty;
-                f[3*k+2]          = f[3*k+2] - tz;
-            }
-            /* Inner loop uses 38 flops/iteration */
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j++)
-        {
-            k = j%natoms;
-
-            /* load j atom coordinates */
-            jx                = x[3*k];
-            jy                = x[3*k+1];
-            jz                = x[3*k+2];
-
-            /* Calculate distance */
-            dx                = ix - jx;
-            dy                = iy - jy;
-            dz                = iz - jz;
-            rsq               = dx*dx+dy*dy+dz*dz;
-
-            /* Calculate 1/r and 1/r2 */
-            rinv             = 1.0/sqrt(rsq);
-
-            /* Load parameters for j atom */
-            isaj              = invsqrta[k];
-            isaprod           = isai*isaj;
-            qq                = iq*charge[k];
-            vcoul             = qq*rinv;
-            fscal             = vcoul*rinv;
-            qq                = isaprod*(-qq)*gbfactor;
-            gbscale           = isaprod*gbtabscale;
-            c6                = pvdw[2*k];
-            c12               = pvdw[2*k+1];
-            rinvsq            = rinv*rinv;
-
-            /* Tabulated Generalized-Born interaction */
-            dvdaj            = dvda[k];
-            r                = rsq*rinv;
-
-            /* Calculate table index */
-            rt               = r*gbscale;
-            n0               = rt;
-            eps              = rt-n0;
-            eps2             = eps*eps;
-            nnn              = 4*n0;
-            Y                = GBtab[nnn];
-            F                = GBtab[nnn+1];
-            Geps             = eps*GBtab[nnn+2];
-            Heps2            = eps2*GBtab[nnn+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+eps*Fp;
-            FF               = Fp+Geps+2.0*Heps2;
-            vgb              = qq*VV;
-            fijC             = qq*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fijC*r);
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[k]          = dvdaj+dvdatmp*isaj*isaj;
-            vctot            = vctot + vcoul;
-            vgbtot           = vgbtot + vgb;
-
-            /* Lennard-Jones interaction */
-            rinvsix          = rinvsq*rinvsq*rinvsq;
-            Vvdw6            = c6*rinvsix;
-            Vvdw12           = c12*rinvsix*rinvsix;
-            Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-            fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv;
-
-            /* Calculate temporary vectorial force */
-            tx                = fscal*dx;
-            ty                = fscal*dy;
-            tz                = fscal*dz;
-
-            /* Increment i atom force */
-            fix               = fix + tx;
-            fiy               = fiy + ty;
-            fiz               = fiz + tz;
-
-            /* Decrement j atom force */
-            f[3*k]            = f[3*k]   - tx;
-            f[3*k+1]          = f[3*k+1] - ty;
-            f[3*k+2]          = f[3*k+2] - tz;
-
-            /* Inner loop uses 38 flops/iteration */
-        }
-
-        f[3*i]   += fix;
-        f[3*i+1] += fiy;
-        f[3*i+2] += fiz;
-
-        /* Add potential energies to the group for this list */
-        ggid             = 0;
-
-        Vc[ggid]         = Vc[ggid] + vctot;
-        Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-        vpol[ggid]       = vpol[ggid] + vgbtot;
-        dvda[i]          = dvda[i] + dvdasum*isai*isai;
-
-        /* Outer loop uses 6 flops/iteration */
-    }
-
-    /* 12 flops per outer iteration
-     * 19 flops per inner iteration
-     */
-    inc_nrnb(nrnb, eNR_NBKERNEL_ELEC_VDW_VF, (ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
deleted file mode 100644
index 943af5c486..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _NB_KERNEL_ALLVSALLGB_H
-#define _NB_KERNEL_ALLVSALLGB_H
-
-#include "config.h"
-
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/nblist.h"
-
-void
-nb_kernel_allvsallgb(t_nblist *                nlist,
-                     rvec *                    x,
-                     rvec *                    f,
-                     struct t_forcerec *       fr,
-                     t_mdatoms *               mdatoms,
-                     nb_kernel_data_t *        kernel_data,
-                     t_nrnb *                  nrnb);
-
-#endif
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
index 11c173e2a9..250ee29c48 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
@@ -57,6 +57,46 @@ nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c;
 nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c;
 nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c;
 nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_c;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_c;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_c;
@@ -177,94 +217,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c;
 nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c;
 nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c;
 nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c;
-nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c;
-nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c;
-nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c;
@@ -365,6 +317,46 @@ nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c;
 nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c;
 nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c;
 nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c;
 
 
 nb_kernel_info_t
@@ -388,6 +380,46 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_c", "c", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_c", "c", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_c", "c", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -508,94 +540,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -695,7 +639,47 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
index 20cf4af2ef..27daeab020 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -143,18 +143,13 @@ void
     real             velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    int              gbitab;
-    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
     int              *vdwtype;
     real             *vdwparam;
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     int              vfitab;
     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
     real             *vftab;
@@ -232,14 +227,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = fr->gbtab->scale;
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = (1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -358,9 +345,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = facel*charge[inr+{I}];
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = invsqrta[inr+{I}];
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = {NVDWPARAM}*nvdwtype*vdwtype[inr+{I}];
@@ -372,16 +356,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = 0.0;
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = 0.0;
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = 0.0;
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = 0.0;
-        /*     #endif */
 
         /* Start inner kernel loop */
         for(jidx=j_index_start; jidx<j_index_end; jidx++)
@@ -434,9 +412,6 @@ void
             /* Load parameters for j particles */
             /*     #for J in PARTICLES_ELEC_J */
             jq{J}              = charge[jnr+{J}];
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            isaj{J}           = invsqrta[jnr+{J}];
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}         = {NVDWPARAM}*vdwtype[jnr+{J}];
@@ -520,47 +495,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = isai{I}*isaj{J};
-            gbqqfactor       = isaprod*(-qq{I}{J})*gbinvepsdiff;
-            gbscale          = isaprod*gbtabscale;
-            dvdaj            = dvda[jnr+{J}];
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = r{I}{J}*gbscale;
-            gbitab           = rt;
-            gbeps            = rt-gbitab;
-            gbitab           = 4*gbitab;
-
-            Y                = gbtab[gbitab];
-            F                = gbtab[gbitab+1];
-            Geps             = gbeps*gbtab[gbitab+2];
-            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
-            Fp               = F+Geps+Heps2;
-            VV               = Y+gbeps*Fp;
-            vgb              = gbqqfactor*VV;
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = Fp+Geps+2.0*Heps2;
-            fgb              = gbqqfactor*FF*gbscale;
-            dvdatmp          = -0.5*(vgb+fgb*r{I}{J});
-            dvdasum          = dvdasum + dvdatmp;
-            dvda[jnr]        = dvdaj+dvdatmp*isaj{J}*isaj{J};
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = qq{I}{J}*rinv{I}{J};
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = (velec*rinv{I}{J}-fgb)*rinv{I}{J};
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -794,10 +728,6 @@ void
             /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
             velecsum        += velec;
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            vgbsum          += vgb;
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             vvdwsum         += vvdw;
@@ -872,18 +802,11 @@ void
         kernel_data->energygrp_elec[ggid] += velecsum;
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        kernel_data->energygrp_polarization[ggid] += vgbsum;
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         kernel_data->energygrp_vdw[ggid] += vvdwsum;
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvda[inr]                   = dvda[inr] + dvdasum*isai{I}*isai{I};
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py
index 6352ce3c0d..582044c2b7 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
deleted file mode 100644
index 4b3e5b68f0..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
+++ /dev/null
@@ -1,820 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        isai0            = gmx_fjsp_load1_v2r8(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vgbsum           = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-        dvdasum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 95 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            isaj0            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vgb              = _fjsp_unpacklo_v2r8(vgb,_fjsp_setzero_v2r8());
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 95 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*95);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        isai0            = gmx_fjsp_load1_v2r8(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 85 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            isaj0            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 85 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*85);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
deleted file mode 100644
index ccb3de73c1..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
+++ /dev/null
@@ -1,706 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        isai0            = gmx_fjsp_load1_v2r8(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vgbsum           = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-        dvdasum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 74 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            isaj0            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vgb              = _fjsp_unpacklo_v2r8(vgb,_fjsp_setzero_v2r8());
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 74 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*74);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        isai0            = gmx_fjsp_load1_v2r8(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 67 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            isaj0            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 67 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*67);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sparc64_hpc_ace_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sparc64_hpc_ace_double.c
deleted file mode 100644
index 5d52647624..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sparc64_hpc_ace_double.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        isai0            = gmx_fjsp_load1_v2r8(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vgbsum           = _fjsp_setzero_v2r8();
-        dvdasum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 61 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            isaj0            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vgb              = _fjsp_unpacklo_v2r8(vgb,_fjsp_setzero_v2r8());
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 61 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*61);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        isai0            = gmx_fjsp_load1_v2r8(invsqrta+inr+0);
-
-        dvdasum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            isaj0            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq00,_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r00,gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r00,vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj0,isaj0)));
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv00,fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*59);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.c
index 8b9f035c13..fe95d60267 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
index dd9cdf8e8c..d47dbd3303 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -128,11 +128,6 @@ void
     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    _fjsp_v2r8       vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp;
-    _fjsp_v2r8       minushalf = gmx_fjsp_set1_v2r8(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -141,7 +136,7 @@ void
     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
     real             *vftab;
     /* #endif */
@@ -224,14 +219,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = gmx_fjsp_set1_v2r8(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = gmx_fjsp_set1_v2r8((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -358,9 +345,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = gmx_fjsp_load1_v2r8(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -372,16 +356,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _fjsp_setzero_v2r8();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _fjsp_setzero_v2r8();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _fjsp_setzero_v2r8();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _fjsp_setzero_v2r8();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -488,13 +466,6 @@ void
             /*         #else */
             jq{J}              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+{J});
             /*         #endif */
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if ROUND =='Loop' */
-            isaj{J}            = gmx_fjsp_load_2real_swizzle_v2r8(invsqrta+jnrA+{J},invsqrta+jnrB+{J});
-            /*             #else */
-            isaj{J}            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),invsqrta+jnrA+{J});
-            /*             #endif */
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -600,61 +571,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _fjsp_mul_v2r8(isai{I},isaj{J});
-            gbqqfactor       = _fjsp_neg_v2r8(_fjsp_mul_v2r8(qq{I}{J},_fjsp_mul_v2r8(isaprod,gbinvepsdiff)));
-            gbscale          = _fjsp_mul_v2r8(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _fjsp_mul_v2r8(r{I}{J},gbscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            gbeps            = _fjsp_sub_v2r8(rt,_fjsp_xtod_v2r8(itab_tmp));
-            _fjsp_store_v2r8(&gbconv.simd,itab_tmp);
-
-            Y                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] );
-            /*             #if ROUND == 'Loop' */
-            F                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] );
-            /*             #else */
-            F                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[0] +2);
-            /*             #if ROUND == 'Loop' */
-            H                = _fjsp_load_v2r8( gbtab + 4*gbconv.i[1] +2);
-            /*             #else */
-            H                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(gbeps,_fjsp_madd_v2r8(gbeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(gbeps,Fp,Y);
-            vgb              = _fjsp_mul_v2r8(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            twogbeps         = _fjsp_add_v2r8(gbeps,gbeps);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twogbeps,H,G),gbeps,Fp);
-            fgb              = _fjsp_mul_v2r8(gbqqfactor,_fjsp_mul_v2r8(FF,gbscale));
-            dvdatmp          = _fjsp_mul_v2r8(minushalf,_fjsp_madd_v2r8(fgb,r{I}{J},vgb));
-            dvdasum          = _fjsp_add_v2r8(dvdasum,dvdatmp);
-            /*             #if ROUND == 'Loop' */
-            gmx_fjsp_increment_2real_swizzle_v2r8(dvda+jnrA,dvda+jnrB,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj{J},isaj{J})));
-            /*             #else */
-            gmx_fjsp_increment_1real_v2r8(dvda+jnrA,_fjsp_mul_v2r8(dvdatmp,_fjsp_mul_v2r8(isaj{J},isaj{J})));
-            /*             #endif */
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _fjsp_mul_v2r8(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _fjsp_mul_v2r8(_fjsp_msub_v2r8(velec,rinv{I}{J},fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -937,17 +853,6 @@ void
             /*             #endif */
             velecsum         = _fjsp_add_v2r8(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _fjsp_and_v2r8(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _fjsp_unpacklo_v2r8(vgb,_fjsp_setzero_v2r8());
-            /*             #endif */
-            vgbsum           = _fjsp_add_v2r8(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
@@ -1086,19 +991,11 @@ void
         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai{I},isai{I}));
-        gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py
index 0fb6d3c14c..ab3cce67ce 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c
deleted file mode 100644
index 9f96a56a04..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c
+++ /dev/null
@@ -1,838 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse2_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse2_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        vvdwsum          = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw6            = _mm_mul_pd(c6_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw12           = _mm_mul_pd(c12_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            vvdw             = _mm_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw6            = _mm_mul_pd(c6_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw12           = _mm_mul_pd(c12_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            vvdw             = _mm_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*82);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c
deleted file mode 100644
index 36956b410f..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse2_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse2_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        vvdwsum          = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_double.c
deleted file mode 100644
index b8c4480865..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_double.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse2_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse2_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*56);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
index 29bd977bac..ba3fb0cb56 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_double;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_double;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_double", "sse2_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_template_sse2_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_template_sse2_double.pre
index 15932a11d2..a59b3432e8 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_template_sse2_double.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_template_sse2_double.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -128,12 +128,6 @@ void
     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -142,7 +136,7 @@ void
     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
@@ -226,14 +220,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -360,9 +346,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm_load1_pd(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -374,16 +357,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm_setzero_pd();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm_setzero_pd();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm_setzero_pd();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm_setzero_pd();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -490,13 +467,6 @@ void
             /*         #else */
             jq{J}              = _mm_load_sd(charge+jnrA+{J});
             /*         #endif */
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if ROUND =='Loop' */
-            isaj{J}            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J});
-            /*             #else */
-            isaj{J}            = _mm_load_sd(invsqrta+jnrA+{J});
-            /*             #endif */
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -604,64 +574,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai{I},isaj{J});
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq{I}{J},_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r{I}{J},gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_cvtepi32_pd(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            /*             #if ROUND == 'Loop' */
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            /*             #else */
-            F                = _mm_setzero_pd();
-            /*             #endif */
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            /*             #if ROUND == 'Loop' */
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            /*             #else */
-            H                = _mm_setzero_pd();
-            /*             #endif */
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r{I}{J})));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            /*                 #endif */
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            /*             #if ROUND == 'Loop' */
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
-            /*             #else */
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
-            /*             #endif */
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _mm_mul_pd(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv{I}{J}),fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -946,17 +858,6 @@ void
             /*             #endif */
             velecsum         = _mm_add_pd(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm_and_pd(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            /*             #endif */
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
@@ -1097,19 +998,11 @@ void
         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai{I},isai{I}));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py
index 1fa8fee809..02701e1ac3 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c
deleted file mode 100644
index 2dd1b08824..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c
+++ /dev/null
@@ -1,958 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse2_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse2_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }  
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        vvdwsum          = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw6            = _mm_mul_ps(c6_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw12           = _mm_mul_ps(c12_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            vvdw             = _mm_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 92 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw6            = _mm_mul_ps(c6_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw12           = _mm_mul_ps(c12_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            vvdw             = _mm_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdw             = _mm_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 93 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*93);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }  
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 82 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(vfitab));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 83 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*83);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c
deleted file mode 100644
index 137dd21fa8..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c
+++ /dev/null
@@ -1,856 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse2_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse2_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }  
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        vvdwsum          = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 71 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdw             = _mm_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 72 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*72);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }  
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 65 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*65);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c
deleted file mode 100644
index 36268cd47e..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c
+++ /dev/null
@@ -1,761 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse2_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse2_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }  
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 58 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }  
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 56 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse2_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-            
-            /* Inner loop uses 57 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*57);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
index 324c1cee56..7ecc4a33da 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_single;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre
index 2ee376dcac..556215e2df 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -131,12 +131,6 @@ void
     __m128           velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -145,7 +139,7 @@ void
     __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
     __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
@@ -229,14 +223,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -370,9 +356,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm_load1_ps(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -384,16 +367,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm_setzero_ps();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm_setzero_ps();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm_setzero_ps();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm_setzero_ps();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -498,10 +475,6 @@ void
             /*     #for J in PARTICLES_ELEC_J */
             jq{J}              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+{J},charge+jnrB+{J},
                                                               charge+jnrC+{J},charge+jnrD+{J});
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            isaj{J}            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J},
-                                                              invsqrta+jnrC+{J},invsqrta+jnrD+{J});
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -610,63 +583,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai{I},isaj{J});
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq{I}{J},_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r{I}{J},gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_cvtepi32_ps(gbitab));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r{I}{J})));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            /*                 #endif */
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /*                 #if ROUND == 'Loop' */
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            /*                 #else */
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            /*                 #endif */
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj{J},isaj{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _mm_mul_ps(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv{I}{J}),fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -913,17 +829,6 @@ void
             /*             #endif */
             velecsum         = _mm_add_ps(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm_and_ps(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            /*             #endif */
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*     ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */
@@ -1075,19 +980,11 @@ void
         gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai{I},isai{I}));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py
index ed5ac7fcae..d24ea9b0bf 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c
deleted file mode 100644
index 316e11ee85..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c
+++ /dev/null
@@ -1,838 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse4_1_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse4_1_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        vvdwsum          = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw6            = _mm_mul_pd(c6_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw12           = _mm_mul_pd(c12_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            vvdw             = _mm_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw6            = _mm_mul_pd(c6_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(vfeps,Fp));
-            vvdw12           = _mm_mul_pd(c12_00,VV);
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            vvdw             = _mm_add_pd(vvdw12,vvdw6);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*92);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_pd(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_pd(r00,vftabscale);
-            vfitab           = _mm_cvttpd_epi32(rt);
-            vfeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw6            = _mm_mul_pd(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( vftab + gmx_mm_extract_epi32(vfitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(vfeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(vfeps,_mm_add_pd(G,Heps)));
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(vfeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fvdw12           = _mm_mul_pd(c12_00,FF);
-            fvdw             = _mm_xor_pd(signbit,_mm_mul_pd(_mm_add_pd(fvdw6,fvdw12),_mm_mul_pd(vftabscale,rinv00)));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*82);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c
deleted file mode 100644
index 3a00b01263..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse4_1_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse4_1_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        vvdwsum          = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_pd(c6_00,rinvsix);
-            vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
-            vvdw             = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
-            vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*71);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
-    __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_2pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            rinvsq00         = _mm_mul_pd(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-            gmx_mm_load_1pair_swizzle_pd(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_pd(felec,fvdw);
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c
deleted file mode 100644
index b78bfe89e9..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse4_1_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse4_1_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_pd();
-        vgbsum           = _mm_setzero_pd();
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
-            velecsum         = _mm_add_pd(velecsum,velec);
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*58);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128d          dummy_mask,cutoff_mask;
-    __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
-    __m128d          one     = _mm_set1_pd(1.0);
-    __m128d          two     = _mm_set1_pd(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_pd(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_pd();
-        fiy0             = _mm_setzero_pd();
-        fiz0             = _mm_setzero_pd();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+0));
-        isai0            = _mm_load1_pd(invsqrta+inr+0);
-
-        dvdasum          = _mm_setzero_pd();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_2real_swizzle_pd(charge+jnrA+0,charge+jnrB+0);
-            isaj0            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+0,invsqrta+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_pd(ix0,jx0);
-            dy00             = _mm_sub_pd(iy0,jy0);
-            dz00             = _mm_sub_pd(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_d(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _mm_load_sd(charge+jnrA+0);
-            isaj0            = _mm_load_sd(invsqrta+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_pd(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_pd(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai0,isaj0);
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq00,_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r00,gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            H                = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
-            velec            = _mm_mul_pd(qq00,rinv00);
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_pd(fscal,dx00);
-            ty               = _mm_mul_pd(fscal,dy00);
-            tz               = _mm_mul_pd(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_pd(fix0,tx);
-            fiy0             = _mm_add_pd(fiy0,ty);
-            fiz0             = _mm_add_pd(fiz0,tz);
-
-            gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f+j_coord_offsetA,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_pd(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai0,isai0));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*56);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.c
index 9980188393..533c80963c 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_double;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_double;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_double;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_template_sse4_1_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_template_sse4_1_double.pre
index 2115c8ef5e..2fc4881f0e 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_template_sse4_1_double.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_template_sse4_1_double.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -128,12 +128,6 @@ void
     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m128d          vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
-    __m128d          minushalf = _mm_set1_pd(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -142,7 +136,7 @@ void
     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m128d          rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
@@ -226,14 +220,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_pd(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_pd((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -360,9 +346,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm_mul_pd(facel,_mm_load1_pd(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm_load1_pd(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -374,16 +357,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm_setzero_pd();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm_setzero_pd();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm_setzero_pd();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm_setzero_pd();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -490,13 +467,6 @@ void
             /*         #else */
             jq{J}              = _mm_load_sd(charge+jnrA+{J});
             /*         #endif */
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if ROUND =='Loop' */
-            isaj{J}            = gmx_mm_load_2real_swizzle_pd(invsqrta+jnrA+{J},invsqrta+jnrB+{J});
-            /*             #else */
-            isaj{J}            = _mm_load_sd(invsqrta+jnrA+{J});
-            /*             #endif */
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -603,64 +573,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_pd(isai{I},isaj{J});
-            gbqqfactor       = _mm_xor_pd(signbit,_mm_mul_pd(qq{I}{J},_mm_mul_pd(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_pd(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_pd(r{I}{J},gbscale);
-            gbitab           = _mm_cvttpd_epi32(rt);
-            gbeps            = _mm_sub_pd(rt,_mm_round_pd(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-
-            Y                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            /*             #if ROUND == 'Loop' */
-            F                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            /*             #else */
-            F                = _mm_setzero_pd();
-            /*             #endif */
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,0) +2);
-            /*             #if ROUND == 'Loop' */
-            H                = _mm_load_pd( gbtab + gmx_mm_extract_epi32(gbitab,1) +2);
-            /*             #else */
-            H                = _mm_setzero_pd();
-            /*             #endif */
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            Heps             = _mm_mul_pd(gbeps,H);
-            Fp               = _mm_add_pd(F,_mm_mul_pd(gbeps,_mm_add_pd(G,Heps)));
-            VV               = _mm_add_pd(Y,_mm_mul_pd(gbeps,Fp));
-            vgb              = _mm_mul_pd(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
-            fgb              = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
-            dvdatmp          = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r{I}{J})));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
-            /*                 #endif */
-            dvdasum          = _mm_add_pd(dvdasum,dvdatmp);
-            /*             #if ROUND == 'Loop' */
-            gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
-            /*             #else */
-            gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
-            /*             #endif */
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _mm_mul_pd(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(velec,rinv{I}{J}),fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -946,17 +858,6 @@ void
             /*             #endif */
             velecsum         = _mm_add_pd(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm_and_pd(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm_unpacklo_pd(vgb,_mm_setzero_pd());
-            /*             #endif */
-            vgbsum           = _mm_add_pd(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
@@ -1097,19 +998,11 @@ void
         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm_update_1pot_pd(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai{I},isai{I}));
-        gmx_mm_update_1pot_pd(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py
index 1a29580a5e..f9184f4b9f 100755
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py
@@ -2,7 +2,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -105,7 +105,6 @@ ElectrostaticsList = {
     'None'                    : [],
     'Coulomb'                 : ['rinv','rinvsq'],
     'ReactionField'           : ['rinv','rinvsq'],
-    'GeneralizedBorn'         : ['rinv','r'],
     'CubicSplineTable'        : ['rinv','r','table'],
     'Ewald'                   : ['rinv','rinvsq','r'],
 }
@@ -190,7 +189,6 @@ Abbreviation = {
     'Coulomb'                 : 'Coul',
     'Ewald'                   : 'Ew',
     'ReactionField'           : 'RF',
-    'GeneralizedBorn'         : 'GB',
     'CubicSplineTable'        : 'CSTab',
     'LennardJones'            : 'LJ',
     'Buckingham'              : 'Bham',
@@ -269,7 +267,7 @@ def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,Kernel
         return 0
 
     # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+    if('Water' in KernelGeom[0] and KernelElec=='None'):
         return 0
 
     # Non-matching table settings are pointless
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c
deleted file mode 100644
index 88351ac65a..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c
+++ /dev/null
@@ -1,954 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse4_1_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse4_1_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        vvdwsum          = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw6            = _mm_mul_ps(c6_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw12           = _mm_mul_ps(c12_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            vvdw             = _mm_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 92 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw6            = _mm_mul_ps(c6_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(vfeps,Fp));
-            vvdw12           = _mm_mul_ps(c12_00,VV);
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            vvdw             = _mm_add_ps(vvdw12,vvdw6);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdw             = _mm_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*93);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = _mm_set1_ps(kernel_data->table_vdw->scale);
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 82 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _mm_mul_ps(r00,vftabscale);
-            vfitab           = _mm_cvttps_epi32(rt);
-            vfeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            vfitab           = _mm_slli_epi32(vfitab,3);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw6            = _mm_mul_ps(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            vfitab           = _mm_add_epi32(vfitab,ifour);
-            Y                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,0) );
-            F                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,1) );
-            G                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,2) );
-            H                = _mm_load_ps( vftab + gmx_mm_extract_epi32(vfitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(vfeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(vfeps,_mm_add_ps(G,Heps)));
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(vfeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fvdw12           = _mm_mul_ps(c12_00,FF);
-            fvdw             = _mm_xor_ps(signbit,_mm_mul_ps(_mm_add_ps(fvdw6,fvdw12),_mm_mul_ps(vftabscale,rinv00)));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 83 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*83);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c
deleted file mode 100644
index 8437ac268d..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c
+++ /dev/null
@@ -1,852 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse4_1_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse4_1_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        vvdwsum          = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 71 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _mm_mul_ps(c6_00,rinvsix);
-            vvdw12           = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
-            vvdw             = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) );
-            fvdw             = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            vvdw             = _mm_andnot_ps(dummy_mask,vvdw);
-            vvdwsum          = _mm_add_ps(vvdwsum,vvdw);
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 72 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 10 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*72);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    int              nvdwtype;
-    __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
-    __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            rinvsq00         = _mm_mul_ps(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-            vdwjidx0C        = 2*vdwtype[jnrC+0];
-            vdwjidx0D        = 2*vdwtype[jnrD+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-            gmx_mm_load_4pair_swizzle_ps(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,
-                                         vdwparam+vdwioffset0+vdwjidx0C,
-                                         vdwparam+vdwioffset0+vdwjidx0D,
-                                         &c6_00,&c12_00);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00));
-
-            fscal            = _mm_add_ps(felec,fvdw);
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 65 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*65);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c
deleted file mode 100644
index 3b150dddcb..0000000000
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c
+++ /dev/null
@@ -1,757 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sse4_1_single kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_x86_sse4_1_single.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-
-        /* Reset potential sums */
-        velecsum         = _mm_setzero_ps();
-        vgbsum           = _mm_setzero_ps();
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 58 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _mm_andnot_ps(dummy_mask,velec);
-            velecsum         = _mm_add_ps(velecsum,velec);
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-
-            fscal            = felec;
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single
- * Electrostatics interaction: GeneralizedBorn
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or 
-     * just 0 for non-waters.
-     * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB,jnrC,jnrD;
-    int              jnrlistA,jnrlistB,jnrlistC,jnrlistD;
-    int              j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    real             *fjptrA,*fjptrB,*fjptrC,*fjptrD;
-    real             scratch[4*DIM];
-    __m128           tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    __m128           ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
-    __m128           jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    __m128           dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    __m128           velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    __m128i          vfitab;
-    __m128i          ifour       = _mm_set1_epi32(4);
-    __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
-    real             *vftab;
-    __m128           dummy_mask,cutoff_mask;
-    __m128           signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
-    __m128           one     = _mm_set1_ps(1.0);
-    __m128           two     = _mm_set1_ps(2.0);
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = _mm_set1_ps(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = jnrC = jnrD = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-    j_coord_offsetC = 0;
-    j_coord_offsetD = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    for(iidx=0;iidx<4*DIM;iidx++)
-    {
-        scratch[iidx] = 0.0;
-    }
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_mm_load_shift_and_1rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _mm_setzero_ps();
-        fiy0             = _mm_setzero_ps();
-        fiz0             = _mm_setzero_ps();
-
-        /* Load parameters for i particles */
-        iq0              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+0));
-        isai0            = _mm_load1_ps(invsqrta+inr+0);
-
-        dvdasum          = _mm_setzero_ps();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            jnrC             = jjnr[jidx+2];
-            jnrD             = jjnr[jidx+3];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = f+j_coord_offsetA;
-            fjptrB             = f+j_coord_offsetB;
-            fjptrC             = f+j_coord_offsetC;
-            fjptrD             = f+j_coord_offsetD;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrlistA         = jjnr[jidx];
-            jnrlistB         = jjnr[jidx+1];
-            jnrlistC         = jjnr[jidx+2];
-            jnrlistD         = jjnr[jidx+3];
-            /* Sign of each element will be negative for non-real atoms.
-             * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
-             * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
-             */
-            dummy_mask = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
-            jnrA       = (jnrlistA>=0) ? jnrlistA : 0;
-            jnrB       = (jnrlistB>=0) ? jnrlistB : 0;
-            jnrC       = (jnrlistC>=0) ? jnrlistC : 0;
-            jnrD       = (jnrlistD>=0) ? jnrlistD : 0;
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-            j_coord_offsetC  = DIM*jnrC;
-            j_coord_offsetD  = DIM*jnrD;
-
-            /* load j atom coordinates */
-            gmx_mm_load_1rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              x+j_coord_offsetC,x+j_coord_offsetD,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _mm_sub_ps(ix0,jx0);
-            dy00             = _mm_sub_ps(iy0,jy0);
-            dz00             = _mm_sub_ps(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
-
-            rinv00           = sse41_invsqrt_f(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
-                                                              charge+jnrC+0,charge+jnrD+0);
-            isaj0            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+0,invsqrta+jnrB+0,
-                                                              invsqrta+jnrC+0,invsqrta+jnrD+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _mm_mul_ps(rsq00,rinv00);
-            r00              = _mm_andnot_ps(dummy_mask,r00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _mm_mul_ps(iq0,jq0);
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai0,isaj0);
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq00,_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r00,gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj0,isaj0)));
-            velec            = _mm_mul_ps(qq00,rinv00);
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv00),fgb),rinv00);
-
-            fscal            = felec;
-
-            fscal            = _mm_andnot_ps(dummy_mask,fscal);
-
-            /* Calculate temporary vectorial force */
-            tx               = _mm_mul_ps(fscal,dx00);
-            ty               = _mm_mul_ps(fscal,dy00);
-            tz               = _mm_mul_ps(fscal,dz00);
-
-            /* Update vectorial force */
-            fix0             = _mm_add_ps(fix0,tx);
-            fiy0             = _mm_add_ps(fiy0,ty);
-            fiz0             = _mm_add_ps(fiz0,tz);
-
-            fjptrA             = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
-            fjptrB             = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
-            fjptrC             = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
-            fjptrD             = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
-            gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_mm_update_iforce_1atom_swizzle_ps(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai0,isai0));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*57);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c
index e2f9d452a5..c339914a02 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,36 @@ nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_single;
@@ -141,72 +171,6 @@ nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single;
-nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single;
@@ -277,6 +241,36 @@ nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single;
 nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single;
 
 
 nb_kernel_info_t
@@ -294,6 +288,36 @@ nb_kernel_info_t
     { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
@@ -384,72 +408,6 @@ nb_kernel_info_t
     { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
     { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
     { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
@@ -519,7 +477,37 @@ nb_kernel_info_t
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
     { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 };
 
 int
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre
index f66ab5e5da..39050e6121 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre
@@ -2,7 +2,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -131,12 +131,6 @@ void
     __m128           velec,felec,velecsum,facel,crf,krf,krf2;
     real             *charge;
     /* #endif */
-    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
-    __m128i          gbitab;
-    __m128           vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,gbeps,dvdatmp;
-    __m128           minushalf = _mm_set1_ps(-0.5);
-    real             *invsqrta,*dvda,*gbtab;
-    /* #endif */
     /* #if KERNEL_VDW != 'None' */
     int              nvdwtype;
     __m128           rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
@@ -145,7 +139,7 @@ void
     __m128           one_sixth   = _mm_set1_ps(1.0/6.0);
     __m128           one_twelfth = _mm_set1_ps(1.0/12.0);
     /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
     __m128i          vfitab;
     __m128i          ifour       = _mm_set1_epi32(4);
     __m128           rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF;
@@ -229,14 +223,6 @@ void
      /*     #endif */
     /* #endif */
 
-    /* #if KERNEL_ELEC=='GeneralizedBorn' */
-    invsqrta         = fr->invsqrta;
-    dvda             = fr->dvda;
-    gbtabscale       = _mm_set1_ps(fr->gbtab->scale);
-    gbtab            = fr->gbtab->data;
-    gbinvepsdiff     = _mm_set1_ps((1.0/fr->ic->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
-    /* #endif */
-
     /* #if 'Water' in GEOMETRY_I */
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
@@ -370,9 +356,6 @@ void
         /*     #for I in PARTICLES_ELEC_I */
         iq{I}              = _mm_mul_ps(facel,_mm_load1_ps(charge+inr+{I}));
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-        isai{I}            = _mm_load1_ps(invsqrta+inr+{I});
-        /*         #endif */
         /*     #endfor */
         /*     #for I in PARTICLES_VDW_I */
         vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
@@ -384,16 +367,10 @@ void
         /*     #if KERNEL_ELEC != 'None' */
         velecsum         = _mm_setzero_ps();
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        vgbsum           = _mm_setzero_ps();
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         vvdwsum          = _mm_setzero_ps();
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum          = _mm_setzero_ps();
-        /*     #endif */
 
         /* #for ROUND in ['Loop','Epilogue'] */
 
@@ -498,10 +475,6 @@ void
             /*     #for J in PARTICLES_ELEC_J */
             jq{J}              = gmx_mm_load_4real_swizzle_ps(charge+jnrA+{J},charge+jnrB+{J},
                                                               charge+jnrC+{J},charge+jnrD+{J});
-            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
-            isaj{J}            = gmx_mm_load_4real_swizzle_ps(invsqrta+jnrA+{J},invsqrta+jnrB+{J},
-                                                              invsqrta+jnrC+{J},invsqrta+jnrD+{J});
-            /*         #endif */
             /*     #endfor */
             /*     #for J in PARTICLES_VDW_J */
             vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
@@ -611,62 +584,6 @@ void
             /*                 #define INNERFLOPS INNERFLOPS+3 */
             /*             #endif */
 
-            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
-
-            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
-            isaprod          = _mm_mul_ps(isai{I},isaj{J});
-            gbqqfactor       = _mm_xor_ps(signbit,_mm_mul_ps(qq{I}{J},_mm_mul_ps(isaprod,gbinvepsdiff)));
-            gbscale          = _mm_mul_ps(isaprod,gbtabscale);
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-
-            /* Calculate generalized born table index - this is a separate table from the normal one,
-             * but we use the same procedure by multiplying r with scale and truncating to integer.
-             */
-            rt               = _mm_mul_ps(r{I}{J},gbscale);
-            gbitab           = _mm_cvttps_epi32(rt);
-            gbeps            = _mm_sub_ps(rt,_mm_round_ps(rt, _MM_FROUND_FLOOR));
-            gbitab           = _mm_slli_epi32(gbitab,2);
-            Y                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,0) );
-            F                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,1) );
-            G                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,2) );
-            H                = _mm_load_ps( gbtab + gmx_mm_extract_epi32(gbitab,3) );
-            _MM_TRANSPOSE4_PS(Y,F,G,H);
-            Heps             = _mm_mul_ps(gbeps,H);
-            Fp               = _mm_add_ps(F,_mm_mul_ps(gbeps,_mm_add_ps(G,Heps)));
-            VV               = _mm_add_ps(Y,_mm_mul_ps(gbeps,Fp));
-            vgb              = _mm_mul_ps(gbqqfactor,VV);
-            /*             #define INNERFLOPS INNERFLOPS+10 */
-
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
-            fgb              = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
-            dvdatmp          = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r{I}{J})));
-            /*                 #if ROUND == 'Epilogue' */
-            dvdatmp          = _mm_andnot_ps(dummy_mask,dvdatmp);
-            /*                 #endif */
-            dvdasum          = _mm_add_ps(dvdasum,dvdatmp);
-            /*                 #if ROUND == 'Loop' */
-            fjptrA           = dvda+jnrA;
-            fjptrB           = dvda+jnrB;
-            fjptrC           = dvda+jnrC;
-            fjptrD           = dvda+jnrD;
-            /*                 #else */
-            /* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
-            fjptrA             = (jnrlistA>=0) ? dvda+jnrA : scratch;
-            fjptrB             = (jnrlistB>=0) ? dvda+jnrB : scratch;
-            fjptrC             = (jnrlistC>=0) ? dvda+jnrC : scratch;
-            fjptrD             = (jnrlistD>=0) ? dvda+jnrD : scratch;
-            /*                 #endif */
-            gmx_mm_increment_4real_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,_mm_mul_ps(dvdatmp,_mm_mul_ps(isaj{J},isaj{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+13 */
-            /*             #endif */
-            velec            = _mm_mul_ps(qq{I}{J},rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(velec,rinv{I}{J}),fgb),rinv{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
             /*         #elif KERNEL_ELEC=='Ewald' */
             /* EWALD ELECTROSTATICS */
 
@@ -913,17 +830,6 @@ void
             /*             #endif */
             velecsum         = _mm_add_ps(velecsum,velec);
             /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vgb              = _mm_and_ps(vgb,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vgb              = _mm_andnot_ps(dummy_mask,vgb);
-            /*             #endif */
-            vgbsum           = _mm_add_ps(vgbsum,vgb);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
             /*     ## Note special check for TIP4P-TIP4P. Since we are cutting of all hydrogen interactions we also cut the LJ-only O-O interaction */
@@ -1075,19 +981,11 @@ void
         gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
-        gmx_mm_update_1pot_ps(vgbsum,kernel_data->energygrp_polarization+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
         /*     #if KERNEL_VDW != 'None' */
         gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
         /*         #define OUTERFLOPS OUTERFLOPS+1 */
         /*     #endif */
         /* #endif */
-        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
-        dvdasum = _mm_mul_ps(dvdasum, _mm_mul_ps(isai{I},isai{I}));
-        gmx_mm_update_1pot_ps(dvdasum,dvda+inr);
-        /*     #endif */
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
diff --git a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp
index 2d683c34cb..aba9e8f735 100644
--- a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp
+++ b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -388,7 +388,6 @@ void do_nonbonded(t_forcerec *fr,
                 }
                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
-                kernel_data.energygrp_polarization  = grppener->ener[egGB];
                 nlist = nblists->nlist_sr;
                 f                                   = f_shortrange;
             }
diff --git a/src/gromacs/gmxlib/nrnb.cpp b/src/gromacs/gmxlib/nrnb.cpp
index 4bc3f67230..ba5ec1da26 100644
--- a/src/gromacs/gmxlib/nrnb.cpp
+++ b/src/gromacs/gmxlib/nrnb.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -85,7 +85,6 @@ static const t_nrnb_data nbdata[eNRNB] = {
     { "NB Generic charge grp kernel",    1 },
     { "NB Free energy kernel",           1 },
     { "NB All-vs-all",                   1 },
-    { "NB All-vs-all, GB",               1 },
 
     { "Pair Search distance check",      9 }, /* nbnxn pair dist. check */
     /* nbnxn kernel flops are based on inner-loops without exclusion checks.
@@ -120,12 +119,6 @@ static const t_nrnb_data nbdata[eNRNB] = {
     { "NxN LJ add LJ Ewald [F]",        36 }, /* extra cost for LJ Ewald */
     { "NxN LJ add LJ Ewald [V&F]",      33 },
     { "1,4 nonbonded interactions",     90 },
-    { "Born radii (Still)",             47 },
-    { "Born radii (HCT/OBC)",          183 },
-    { "Born force chain rule",          15 },
-    { "All-vs-All Still radii",          1 },
-    { "All-vs-All HCT/OBC radii",        1 },
-    { "All-vs-All Born chain rule",      1 },
     { "Calc Weights",                   36 },
     { "Spread Q",                        6 },
     { "Spread Q Bspline",                2 },
@@ -189,7 +182,6 @@ static const t_nrnb_data nbdata[eNRNB] = {
     { "Virtual Site 4fd",              110 },
     { "Virtual Site 4fdn",             254 },
     { "Virtual Site N",                 15 },
-    { "Mixed Generalized Born stuff",   10 },
     { "CMAP",                         1700 }, // Estimate!
     { "Urey-Bradley",                  183 },
     { "Cross-Bond-Bond",               163 },
@@ -334,7 +326,7 @@ void print_flop(FILE *out, t_nrnb *nrnb, double *nbfs, double *mflop)
     const char   *myline = "-----------------------------------------------------------------------------";
 
     *nbfs = 0.0;
-    for (i = 0; (i < eNR_NBKERNEL_ALLVSALLGB); i++)
+    for (i = 0; (i < eNR_NBKERNEL_TOTAL_NR); i++)
     {
         if (std::strstr(nbdata[i].name, "W3-W3") != nullptr)
         {
@@ -574,7 +566,7 @@ void pr_load(FILE *log, t_commrec *cr, t_nrnb nrnb[])
     {
         add_nrnb(av, av, &(nrnb[i]));
         /* Cost due to forces */
-        for (j = 0; (j < eNR_NBKERNEL_ALLVSALLGB); j++)
+        for (j = 0; (j < eNR_NBKERNEL_TOTAL_NR); j++)
         {
             ftot[i] += nrnb[i].n[j]*cost_nrnb(j);
         }
diff --git a/src/gromacs/gmxlib/nrnb.h b/src/gromacs/gmxlib/nrnb.h
index 5cc1d8a7d5..53ee66765d 100644
--- a/src/gromacs/gmxlib/nrnb.h
+++ b/src/gromacs/gmxlib/nrnb.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -75,8 +75,8 @@ enum
     eNR_NBKERNEL_GENERIC_CG,
     eNR_NBKERNEL_FREE_ENERGY,               /* Add other generic kernels _before_ the free energy one */
 
-    eNR_NBKERNEL_ALLVSALL,
-    eNR_NBKERNEL_ALLVSALLGB,
+    eNR_NBKERNEL_TOTAL_NR,
+    eNR_NBKERNEL_ALLVSALL = eNR_NBKERNEL_TOTAL_NR, // Reuse the symbolic constant that indicates the last kernel
 
     eNR_NBNXN_DIST2,
     eNR_NBNXN_LJ_RF,          eNR_NBNXN_LJ_RF_E,
@@ -90,10 +90,6 @@ enum
     eNR_NBNXN_ADD_LJ_PSW,     eNR_NBNXN_ADD_LJ_PSW_E,
     eNR_NBNXN_ADD_LJ_EWALD,   eNR_NBNXN_ADD_LJ_EWALD_E,
     eNR_NB14,
-    eNR_BORN_RADII_STILL,     eNR_BORN_RADII_HCT_OBC,
-    eNR_BORN_CHAINRULE,
-    eNR_BORN_AVA_RADII_STILL, eNR_BORN_AVA_RADII_HCT_OBC,
-    eNR_BORN_AVA_CHAINRULE,
     eNR_WEIGHTS,              eNR_SPREAD,               eNR_SPREADBSP,
     eNR_GATHERF,              eNR_GATHERFBSP,           eNR_FFT,
     eNR_CONV,                 eNR_SOLVEPME, eNR_NS,      eNR_RESETX,
@@ -115,7 +111,7 @@ enum
     eNR_SHAKE_RIJ,            eNR_CONSTR_VIR,           eNR_SETTLE,
     eNR_VSITE2,               eNR_VSITE3,               eNR_VSITE3FD,
     eNR_VSITE3FAD,            eNR_VSITE3OUT,            eNR_VSITE4FD,
-    eNR_VSITE4FDN,            eNR_VSITEN,               eNR_GB,
+    eNR_VSITE4FDN,            eNR_VSITEN,
     eNR_CMAP,                 eNR_UREY_BRADLEY,         eNR_CROSS_BOND_BOND,
     eNR_CROSS_BOND_ANGLE,
     eNRNB
diff --git a/src/gromacs/gmxpreprocess/convparm.cpp b/src/gromacs/gmxpreprocess/convparm.cpp
index 0a2f123d11..619e55b826 100644
--- a/src/gromacs/gmxpreprocess/convparm.cpp
+++ b/src/gromacs/gmxpreprocess/convparm.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -442,14 +442,9 @@ assign_param(t_functype ftype, t_iparams *newparam,
             newparam->cmap.cmapA = static_cast<int>(old[0]);
             newparam->cmap.cmapB = static_cast<int>(old[1]);
             break;
-        case F_GB12:
-        case F_GB13:
-        case F_GB14:
-            newparam->gb.sar  = old[0];
-            newparam->gb.st   = old[1];
-            newparam->gb.pi   = old[2];
-            newparam->gb.gbr  = old[3];
-            newparam->gb.bmlt = old[4];
+        case F_GB12_NOLONGERUSED:
+        case F_GB13_NOLONGERUSED:
+        case F_GB14_NOLONGERUSED:
             break;
         default:
             gmx_fatal(FARGS, "unknown function type %d in %s line %d",
@@ -478,26 +473,9 @@ static int enter_params(gmx_ffparams_t *ffparams, t_functype ftype,
         {
             if (ffparams->functype[type] == ftype)
             {
-                if (F_GB13 == ftype)
+                if (memcmp(&newparam, &ffparams->iparams[type], (size_t)sizeof(newparam)) == 0)
                 {
-                    /* Occasionally, the way the 1-3 reference distance is
-                     * computed can lead to non-binary-identical results, but I
-                     * don't know why. */
-                    if ((gmx_within_tol(newparam.gb.sar,  ffparams->iparams[type].gb.sar,  1e-6)) &&
-                        (gmx_within_tol(newparam.gb.st,   ffparams->iparams[type].gb.st,   1e-6)) &&
-                        (gmx_within_tol(newparam.gb.pi,   ffparams->iparams[type].gb.pi,   1e-6)) &&
-                        (gmx_within_tol(newparam.gb.gbr,  ffparams->iparams[type].gb.gbr,  1e-6)) &&
-                        (gmx_within_tol(newparam.gb.bmlt, ffparams->iparams[type].gb.bmlt, 1e-6)))
-                    {
-                        return type;
-                    }
-                }
-                else
-                {
-                    if (memcmp(&newparam, &ffparams->iparams[type], (size_t)sizeof(newparam)) == 0)
-                    {
-                        return type;
-                    }
+                    return type;
                 }
             }
         }
diff --git a/src/gromacs/gmxpreprocess/gpp_atomtype.cpp b/src/gromacs/gmxpreprocess/gpp_atomtype.cpp
index 68538e8426..50f7d7f124 100644
--- a/src/gromacs/gmxpreprocess/gpp_atomtype.cpp
+++ b/src/gromacs/gmxpreprocess/gpp_atomtype.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2011,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -59,11 +59,6 @@ typedef struct gpp_atomtype {
     char          ***atomname;     /* Names of the atomtypes		*/
     t_param         *nb;           /* Nonbonded force default params	*/
     int             *bondatomtype; /* The bond_atomtype for each atomtype  */
-    real            *radius;       /* Radius for GBSA stuff                */
-    real            *vol;          /* Effective volume for GBSA            */
-    real            *surftens;     /* Surface tension with water, for GBSA */
-    real            *gb_radius;    /* Radius for Still model               */
-    real            *S_hct;        /* Overlap factor for HCT model         */
     int             *atomnumber;   /* Atomic number, used for QM/MM        */
 } t_gpp_atomtype;
 
@@ -168,56 +163,6 @@ int get_atomtype_atomnumber(int nt, gpp_atomtype_t ga)
     return ga->atomnumber[nt];
 }
 
-real get_atomtype_radius(int nt, gpp_atomtype_t ga)
-{
-    if ((nt < 0) || (nt >= ga->nr))
-    {
-        return NOTSET;
-    }
-
-    return ga->radius[nt];
-}
-
-real get_atomtype_vol(int nt, gpp_atomtype_t ga)
-{
-    if ((nt < 0) || (nt >= ga->nr))
-    {
-        return NOTSET;
-    }
-
-    return ga->vol[nt];
-}
-
-real get_atomtype_surftens(int nt, gpp_atomtype_t ga)
-{
-    if ((nt < 0) || (nt >= ga->nr))
-    {
-        return NOTSET;
-    }
-
-    return ga->surftens[nt];
-}
-
-real get_atomtype_gb_radius(int nt, gpp_atomtype_t ga)
-{
-    if ((nt < 0) || (nt >= ga->nr))
-    {
-        return NOTSET;
-    }
-
-    return ga->gb_radius[nt];
-}
-
-real get_atomtype_S_hct(int nt, gpp_atomtype_t ga)
-{
-    if ((nt < 0) || (nt >= ga->nr))
-    {
-        return NOTSET;
-    }
-
-    return ga->S_hct[nt];
-}
-
 real get_atomtype_nbparam(int nt, int param, gpp_atomtype_t ga)
 {
     if ((nt < 0) || (nt >= ga->nr))
@@ -242,41 +187,14 @@ gpp_atomtype_t init_atomtype(void)
     ga->atomname     = nullptr;
     ga->nb           = nullptr;
     ga->bondatomtype = nullptr;
-    ga->radius       = nullptr;
-    ga->vol          = nullptr;
-    ga->surftens     = nullptr;
     ga->atomnumber   = nullptr;
-    ga->gb_radius    = nullptr;
-    ga->S_hct        = nullptr;
 
     return ga;
 }
 
-int
-set_atomtype_gbparam(gpp_atomtype_t ga, int i,
-                     real radius, real vol, real surftens,
-                     real gb_radius, real S_hct)
-{
-    if ( (i < 0) || (i >= ga->nr))
-    {
-        return NOTSET;
-    }
-
-    ga->radius[i]    = radius;
-    ga->vol[i]       = vol;
-    ga->surftens[i]  = surftens;
-    ga->gb_radius[i] = gb_radius;
-    ga->S_hct[i]     = S_hct;
-
-    return i;
-}
-
-
 int set_atomtype(int nt, gpp_atomtype_t ga, t_symtab *tab,
                  t_atom *a, const char *name, t_param *nb,
-                 int bondatomtype,
-                 real radius, real vol, real surftens, int atomnumber,
-                 real gb_radius, real S_hct)
+                 int bondatomtype, int atomnumber)
 {
     if ((nt < 0) || (nt >= ga->nr))
     {
@@ -287,21 +205,14 @@ int set_atomtype(int nt, gpp_atomtype_t ga, t_symtab *tab,
     ga->atomname[nt]     = put_symtab(tab, name);
     ga->nb[nt]           = *nb;
     ga->bondatomtype[nt] = bondatomtype;
-    ga->radius[nt]       = radius;
-    ga->vol[nt]          = vol;
-    ga->surftens[nt]     = surftens;
     ga->atomnumber[nt]   = atomnumber;
-    ga->gb_radius[nt]    = gb_radius;
-    ga->S_hct[nt]        = S_hct;
 
     return nt;
 }
 
 int add_atomtype(gpp_atomtype_t ga, t_symtab *tab,
                  t_atom *a, const char *name, t_param *nb,
-                 int bondatomtype,
-                 real radius, real vol, real surftens, int atomnumber,
-                 real gb_radius, real S_hct)
+                 int bondatomtype, int atomnumber)
 {
     int i;
 
@@ -323,15 +234,9 @@ int add_atomtype(gpp_atomtype_t ga, t_symtab *tab,
         srenew(ga->atomname, ga->nr);
         srenew(ga->nb, ga->nr);
         srenew(ga->bondatomtype, ga->nr);
-        srenew(ga->radius, ga->nr);
-        srenew(ga->vol, ga->nr);
-        srenew(ga->surftens, ga->nr);
         srenew(ga->atomnumber, ga->nr);
-        srenew(ga->gb_radius, ga->nr);
-        srenew(ga->S_hct, ga->nr);
 
-        return set_atomtype(ga->nr-1, ga, tab, a, name, nb, bondatomtype, radius,
-                            vol, surftens, atomnumber, gb_radius, S_hct);
+        return set_atomtype(ga->nr-1, ga, tab, a, name, nb, bondatomtype, atomnumber);
     }
     else
     {
@@ -364,11 +269,6 @@ void done_atomtype(gpp_atomtype_t ga)
     sfree(ga->atomname);
     sfree(ga->nb);
     sfree(ga->bondatomtype);
-    sfree(ga->radius);
-    sfree(ga->vol);
-    sfree(ga->gb_radius);
-    sfree(ga->S_hct);
-    sfree(ga->surftens);
     sfree(ga->atomnumber);
     ga->nr = 0;
     sfree(ga);
@@ -404,15 +304,10 @@ static int search_atomtypes(gpp_atomtype_t ga, int *n, int typelist[],
                 bFound = (param[ntype*typelist[i]+j].c[k] == param[ntype*thistype+j].c[k]);
             }
 
-            /* Check radius, volume, surftens */
+            /* Check atomnumber */
             tli    = typelist[i];
             bFound = bFound &&
-                (get_atomtype_radius(tli, ga) == get_atomtype_radius(thistype, ga)) &&
-                (get_atomtype_vol(tli, ga) == get_atomtype_vol(thistype, ga)) &&
-                (get_atomtype_surftens(tli, ga) == get_atomtype_surftens(thistype, ga)) &&
-                (get_atomtype_atomnumber(tli, ga) == get_atomtype_atomnumber(thistype, ga)) &&
-                (get_atomtype_gb_radius(tli, ga) == get_atomtype_gb_radius(thistype, ga)) &&
-                (get_atomtype_S_hct(tli, ga) == get_atomtype_S_hct(thistype, ga));
+                (get_atomtype_atomnumber(tli, ga) == get_atomtype_atomnumber(thistype, ga));
         }
         if (bFound)
         {
@@ -446,11 +341,6 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop,
     t_atoms    *atoms;
     t_param    *nbsnew;
     int        *typelist;
-    real       *new_radius;
-    real       *new_vol;
-    real       *new_surftens;
-    real       *new_gb_radius;
-    real       *new_S_hct;
     int        *new_atomnumber;
     char     ***new_atomname;
 
@@ -467,10 +357,6 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop,
      * ones with identical nonbonded interactions, in addition
      * to removing unused ones.
      *
-     * With Generalized-Born electrostatics, or implicit solvent
-     * we also check that the atomtype radius, effective_volume
-     * and surface tension match.
-     *
      * With QM/MM we also check that the atom numbers match
      */
 
@@ -512,12 +398,7 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop,
         }
     }
 
-    snew(new_radius, nat);
-    snew(new_vol, nat);
-    snew(new_surftens, nat);
     snew(new_atomnumber, nat);
-    snew(new_gb_radius, nat);
-    snew(new_S_hct, nat);
     snew(new_atomname, nat);
 
     /* We now have a list of unique atomtypes in typelist */
@@ -544,12 +425,7 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop,
                 nbsnew[k].c[l] = plist[ftype].param[ntype*mi+mj].c[l];
             }
         }
-        new_radius[i]     = get_atomtype_radius(mi, ga);
-        new_vol[i]        = get_atomtype_vol(mi, ga);
-        new_surftens[i]   = get_atomtype_surftens(mi, ga);
         new_atomnumber[i] = get_atomtype_atomnumber(mi, ga);
-        new_gb_radius[i]  = get_atomtype_gb_radius(mi, ga);
-        new_S_hct[i]      = get_atomtype_S_hct(mi, ga);
         new_atomname[i]   = ga->atomname[mi];
     }
 
@@ -563,21 +439,11 @@ void renum_atype(t_params plist[], gmx_mtop_t *mtop,
     plist[ftype].nr     = i;
     mtop->ffparams.atnr = nat;
 
-    sfree(ga->radius);
-    sfree(ga->vol);
-    sfree(ga->surftens);
     sfree(ga->atomnumber);
-    sfree(ga->gb_radius);
-    sfree(ga->S_hct);
     /* Dangling atomname pointers ? */
     sfree(ga->atomname);
 
-    ga->radius     = new_radius;
-    ga->vol        = new_vol;
-    ga->surftens   = new_surftens;
     ga->atomnumber = new_atomnumber;
-    ga->gb_radius  = new_gb_radius;
-    ga->S_hct      = new_S_hct;
     ga->atomname   = new_atomname;
 
     ga->nr = nat;
@@ -593,20 +459,10 @@ void copy_atomtype_atomtypes(gpp_atomtype_t ga, t_atomtypes *atomtypes)
     /* Copy the atomtype data to the topology atomtype list */
     ntype         = get_atomtype_ntypes(ga);
     atomtypes->nr = ntype;
-    snew(atomtypes->radius, ntype);
-    snew(atomtypes->vol, ntype);
-    snew(atomtypes->surftens, ntype);
     snew(atomtypes->atomnumber, ntype);
-    snew(atomtypes->gb_radius, ntype);
-    snew(atomtypes->S_hct, ntype);
 
     for (i = 0; i < ntype; i++)
     {
-        atomtypes->radius[i]     = ga->radius[i];
-        atomtypes->vol[i]        = ga->vol[i];
-        atomtypes->surftens[i]   = ga->surftens[i];
         atomtypes->atomnumber[i] = ga->atomnumber[i];
-        atomtypes->gb_radius[i]  = ga->gb_radius[i];
-        atomtypes->S_hct[i]      = ga->S_hct[i];
     }
 }
diff --git a/src/gromacs/gmxpreprocess/gpp_atomtype.h b/src/gromacs/gmxpreprocess/gpp_atomtype.h
index 8197540305..66edce1ee2 100644
--- a/src/gromacs/gmxpreprocess/gpp_atomtype.h
+++ b/src/gromacs/gmxpreprocess/gpp_atomtype.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2011,2014,2015,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -60,11 +60,6 @@ real get_atomtype_massA(int nt, gpp_atomtype_t at);
 real get_atomtype_massB(int nt, gpp_atomtype_t at);
 real get_atomtype_qA(int nt, gpp_atomtype_t at);
 real get_atomtype_qB(int nt, gpp_atomtype_t at);
-real get_atomtype_radius(int nt, gpp_atomtype_t at);
-real get_atomtype_vol(int nt, gpp_atomtype_t at);
-real get_atomtype_surftens(int nt, gpp_atomtype_t at);
-real get_atomtype_gb_radius(int nt, gpp_atomtype_t at);
-real get_atomtype_S_hct(int nt, gpp_atomtype_t at);
 int get_atomtype_ptype(int nt, gpp_atomtype_t at);
 int get_atomtype_batype(int nt, gpp_atomtype_t at);
 int get_atomtype_atomnumber(int nt, gpp_atomtype_t at);
@@ -82,22 +77,13 @@ void done_atomtype(gpp_atomtype_t at);
 
 int set_atomtype(int nt, gpp_atomtype_t at, struct t_symtab *tab,
                  t_atom *a, const char *name, t_param *nb,
-                 int bondatomtype,
-                 real radius, real vol, real surftens, int atomnumber,
-                 real gb_radius, real S_hct);
+                 int bondatomtype, int atomnumber);
 /* Set the values of an existing atom type nt. Returns nt on success or
    NOTSET on error. */
 
-int
-set_atomtype_gbparam(gpp_atomtype_t at, int i,
-                     real radius, real vol, real surftens,
-                     real gb_radius, real S_hct);
-
 int add_atomtype(gpp_atomtype_t at, struct t_symtab *tab,
                  t_atom *a, const char *name, t_param *nb,
-                 int bondatomtype,
-                 real radius, real vol, real surftens, int atomnumber,
-                 real gb_radius, real S_hct);
+                 int bondatomtype, int atomnumber);
 /* Add a complete new atom type to an existing atomtype structure. Returns
    the number of the atom type. */
 
diff --git a/src/gromacs/gmxpreprocess/grompp.cpp b/src/gromacs/gmxpreprocess/grompp.cpp
index 30cbe16288..2133aa7194 100644
--- a/src/gromacs/gmxpreprocess/grompp.cpp
+++ b/src/gromacs/gmxpreprocess/grompp.cpp
@@ -77,7 +77,6 @@
 #include "gromacs/mdlib/calc_verletbuf.h"
 #include "gromacs/mdlib/compute_io.h"
 #include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/genborn.h"
 #include "gromacs/mdlib/perf_est.h"
 #include "gromacs/mdlib/sim_util.h"
 #include "gromacs/mdrunutility/mdmodules.h"
@@ -520,23 +519,16 @@ new_status(const char *topfile, const char *topppfile, const char *confin,
     gmx_molblock_t *molblock, *molbs;
     int             mb, i, nrmols, nmismatch;
     char            buf[STRLEN];
-    gmx_bool        bGB = FALSE;
     char            warn_buf[STRLEN];
 
     init_mtop(sys);
 
-    /* Set gmx_boolean for GB */
-    if (ir->implicit_solvent)
-    {
-        bGB = TRUE;
-    }
-
     /* TOPOLOGY processing */
     sys->name = do_top(bVerbose, topfile, topppfile, opts, bZero, &(sys->symtab),
                        plist, comb, reppow, fudgeQQ,
                        atype, &nrmols, &molinfo, intermolecular_interactions,
                        ir,
-                       &nmolblock, &molblock, bGB,
+                       &nmolblock, &molblock,
                        wi);
 
     sys->nmolblock = 0;
@@ -1270,73 +1262,6 @@ static int count_constraints(gmx_mtop_t *mtop, t_molinfo *mi, warninp_t wi)
     return count;
 }
 
-static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype)
-{
-    int            i, nmiss, natoms, mt;
-    real           q;
-    const t_atoms *atoms;
-
-    nmiss = 0;
-    for (mt = 0; mt < sys->nmoltype; mt++)
-    {
-        atoms  = &sys->moltype[mt].atoms;
-        natoms = atoms->nr;
-
-        for (i = 0; i < natoms; i++)
-        {
-            q = atoms->atom[i].q;
-            if ((get_atomtype_radius(atoms->atom[i].type, atype)    == 0  ||
-                 get_atomtype_vol(atoms->atom[i].type, atype)       == 0  ||
-                 get_atomtype_surftens(atoms->atom[i].type, atype)  == 0  ||
-                 get_atomtype_gb_radius(atoms->atom[i].type, atype) == 0  ||
-                 get_atomtype_S_hct(atoms->atom[i].type, atype)     == 0) &&
-                q != 0)
-            {
-                fprintf(stderr, "\nGB parameter(s) zero for atom type '%s' while charge is %g\n",
-                        get_atomtype_name(atoms->atom[i].type, atype), q);
-                nmiss++;
-            }
-        }
-    }
-
-    if (nmiss > 0)
-    {
-        gmx_fatal(FARGS, "Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.", nmiss);
-    }
-}
-
-
-static void check_gbsa_params(gpp_atomtype_t atype)
-{
-    int  nmiss, i;
-
-    /* If we are doing GBSA, check that we got the parameters we need
-     * This checking is to see if there are GBSA paratmeters for all
-     * atoms in the force field. To go around this for testing purposes
-     * comment out the nerror++ counter temporarily
-     */
-    nmiss = 0;
-    for (i = 0; i < get_atomtype_ntypes(atype); i++)
-    {
-        if (get_atomtype_radius(i, atype)    < 0 ||
-            get_atomtype_vol(i, atype)       < 0 ||
-            get_atomtype_surftens(i, atype)  < 0 ||
-            get_atomtype_gb_radius(i, atype) < 0 ||
-            get_atomtype_S_hct(i, atype)     < 0)
-        {
-            fprintf(stderr, "\nGB parameter(s) missing or negative for atom type '%s'\n",
-                    get_atomtype_name(i, atype));
-            nmiss++;
-        }
-    }
-
-    if (nmiss > 0)
-    {
-        gmx_fatal(FARGS, "Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.", nmiss);
-    }
-
-}
-
 static real calc_temp(const gmx_mtop_t *mtop,
                       const t_inputrec *ir,
                       rvec             *v)
@@ -2061,15 +1986,9 @@ int gmx_grompp(int argc, char *argv[])
         get_atomtype_ntypes(atype);
     }
 
-    if (ir->implicit_solvent != eisNO)
+    if (ir->implicit_solvent)
     {
-        /* Now we have renumbered the atom types, we can check the GBSA params */
-        check_gbsa_params(atype);
-
-        /* Check that all atoms that have charge and/or LJ-parameters also have
-         * sensible GB-parameters
-         */
-        check_gbsa_params_charged(sys, atype);
+        gmx_fatal(FARGS, "Implicit solvation is no longer supported");
     }
 
     /* PELA: Copy the atomtype data to the topology atomtype list */
diff --git a/src/gromacs/gmxpreprocess/nm2type.cpp b/src/gromacs/gmxpreprocess/nm2type.cpp
index 25f770f7f0..3b7d145a8c 100644
--- a/src/gromacs/gmxpreprocess/nm2type.cpp
+++ b/src/gromacs/gmxpreprocess/nm2type.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -343,7 +343,7 @@ int nm2type(int nnm, t_nm2type nm2t[], struct t_symtab *tab, t_atoms *atoms,
                 atoms->atom[i].qB = alpha;
                 atoms->atom[i].m  = atoms->atom[i].mB = mm;
                 k                 = add_atomtype(atype, tab, &(atoms->atom[i]), type, param,
-                                                 atoms->atom[i].type, 0, 0, 0, atomnr, 0, 0);
+                                                 atoms->atom[i].type, atomnr);
             }
             atoms->atom[i].type  = k;
             atoms->atom[i].typeB = k;
diff --git a/src/gromacs/gmxpreprocess/readir.cpp b/src/gromacs/gmxpreprocess/readir.cpp
index eab0056b10..8286170b7d 100644
--- a/src/gromacs/gmxpreprocess/readir.cpp
+++ b/src/gromacs/gmxpreprocess/readir.cpp
@@ -370,11 +370,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
             warning_error(wi, warn_buf);
         }
 
-        if (ir->implicit_solvent != eisNO)
-        {
-            warning_error(wi, "Implicit solvent is not (yet) supported with the with Verlet lists.");
-        }
-
         if (EEL_USER(ir->coulombtype))
         {
             sprintf(warn_buf, "Coulomb type %s is not supported with the verlet scheme", eel_names[ir->coulombtype]);
@@ -1050,12 +1045,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
         warning(wi, warn_buf);
     }
 
-    if (ir->epsilon_r != 1 && ir->implicit_solvent == eisGBSA)
-    {
-        sprintf(warn_buf, "epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric", ir->epsilon_r);
-        warning_note(wi, warn_buf);
-    }
-
     if (EEL_RF(ir->coulombtype) && ir->epsilon_rf == 1 && ir->epsilon_r != 1)
     {
         sprintf(warn_buf, "epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf", ir->epsilon_r);
@@ -1067,9 +1056,9 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
     if (ir->epsilon_r == 0)
     {
         sprintf(err_buf,
-                "It is pointless to use long-range or Generalized Born electrostatics with infinite relative permittivity."
+                "It is pointless to use long-range electrostatics with infinite relative permittivity."
                 "Since you are effectively turning of electrostatics, a plain cutoff will be much faster.");
-        CHECK(EEL_FULL(ir->coulombtype) || ir->implicit_solvent == eisGBSA);
+        CHECK(EEL_FULL(ir->coulombtype));
     }
 
     if (getenv("GMX_DO_GALACTIC_DYNAMICS") == nullptr)
@@ -1333,60 +1322,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
         warning_error(wi, warn_buf);
     }
 
-    if (ir->sa_algorithm == esaSTILL)
-    {
-        sprintf(err_buf, "Still SA algorithm not available yet, use %s or %s instead\n", esa_names[esaAPPROX], esa_names[esaNO]);
-        CHECK(ir->sa_algorithm == esaSTILL);
-    }
-
-    if (ir->implicit_solvent == eisGBSA)
-    {
-        sprintf(err_buf, "With GBSA implicit solvent, rgbradii must be equal to rlist.");
-        CHECK(ir->rgbradii != ir->rlist);
-
-        if (ir->coulombtype != eelCUT)
-        {
-            sprintf(err_buf, "With GBSA, coulombtype must be equal to %s\n", eel_names[eelCUT]);
-            CHECK(ir->coulombtype != eelCUT);
-        }
-        if (ir->vdwtype != evdwCUT)
-        {
-            sprintf(err_buf, "With GBSA, vdw-type must be equal to %s\n", evdw_names[evdwCUT]);
-            CHECK(ir->vdwtype != evdwCUT);
-        }
-        if (ir->nstgbradii < 1)
-        {
-            sprintf(warn_buf, "Using GBSA with nstgbradii<1, setting nstgbradii=1");
-            warning_note(wi, warn_buf);
-            ir->nstgbradii = 1;
-        }
-        if (ir->sa_algorithm == esaNO)
-        {
-            sprintf(warn_buf, "No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
-            warning_note(wi, warn_buf);
-        }
-        if (ir->sa_surface_tension < 0 && ir->sa_algorithm != esaNO)
-        {
-            sprintf(warn_buf, "Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
-            warning_note(wi, warn_buf);
-
-            if (ir->gb_algorithm == egbSTILL)
-            {
-                ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
-            }
-            else
-            {
-                ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
-            }
-        }
-        if (ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO)
-        {
-            sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
-            CHECK(ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO);
-        }
-
-    }
-
     if (ir->bQMMM)
     {
         if (ir->cutoff_scheme != ecutsGROUP)
@@ -1834,6 +1769,8 @@ void get_ir(const char *mdparin, const char *mdparout,
     t_lambda   *fep    = ir->fepvals;
     t_expanded *expand = ir->expandedvals;
 
+    const char *no_names[] = { "no", nullptr };
+
     init_inputrec_strings();
     gmx::TextInputFile stream(mdparin);
     inp = read_inpfile(&stream, mdparin, &ninp, wi);
@@ -1878,6 +1815,17 @@ void get_ir(const char *mdparin, const char *mdparout,
     REM_TYPE("rlistlong");
     REM_TYPE("nstcalclr");
     REM_TYPE("pull-print-com2");
+    REM_TYPE("gb-algorithm");
+    REM_TYPE("nstgbradii");
+    REM_TYPE("rgbradii");
+    REM_TYPE("gb-epsilon-solvent");
+    REM_TYPE("gb-saltconc");
+    REM_TYPE("gb-obc-alpha");
+    REM_TYPE("gb-obc-beta");
+    REM_TYPE("gb-obc-gamma");
+    REM_TYPE("gb-dielectric-offset");
+    REM_TYPE("sa-algorithm");
+    REM_TYPE("sa-surface-tension");
 
     /* replace the following commands with the clearer new versions*/
     REPL_TYPE("unconstrained-start", "continuation");
@@ -2008,30 +1956,10 @@ void get_ir(const char *mdparin, const char *mdparout,
     EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
     RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
 
-    CCTYPE("IMPLICIT SOLVENT ALGORITHM");
-    EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
-
-    CCTYPE ("GENERALIZED BORN ELECTROSTATICS");
-    CTYPE ("Algorithm for calculating Born radii");
-    EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
-    CTYPE ("Frequency of calculating the Born radii inside rlist");
-    ITYPE ("nstgbradii", ir->nstgbradii, 1);
-    CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
-    CTYPE ("between rlist and rgbradii is updated every nstlist steps");
-    RTYPE ("rgbradii",  ir->rgbradii, 1.0);
-    CTYPE ("Dielectric coefficient of the implicit solvent");
-    RTYPE ("gb-epsilon-solvent", ir->gb_epsilon_solvent, 80.0);
-    CTYPE ("Salt concentration in M for Generalized Born models");
-    RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
-    CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
-    RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
-    RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
-    RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
-    RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
-    EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
-    CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
-    CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
-    RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
+    /* Implicit solvation is no longer supported, but we need grompp
+       to be able to refuse old .mdp files that would have built a tpr
+       to run it. Thus, only "no" is accepted. */
+    EETYPE("implicit-solvent", ir->implicit_solvent, no_names);
 
     /* Coupling stuff */
     CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
@@ -2362,8 +2290,9 @@ void get_ir(const char *mdparin, const char *mdparout,
         RTYPE("threshold", ir->swap->threshold, 1.0);
     }
 
-    /* AdResS is no longer supported, but we need mdrun to be able to refuse to run old AdResS .tpr files */
-    EETYPE("adress", ir->bAdress, yesno_names);
+    /* AdResS is no longer supported, but we need grompp to be able to
+       refuse to process old .mdp files that used it. */
+    EETYPE("adress", ir->bAdress, no_names);
 
     /* User defined thingies */
     CCTYPE ("User defined thingies");
@@ -4228,7 +4157,7 @@ void triple_check(const char *mdparin, t_inputrec *ir, gmx_mtop_t *sys,
     }
     else
     {
-        if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent)
+        if (ir->coulombtype == eelCUT && ir->rcoulomb > 0)
         {
             sprintf(err_buf,
                     "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
diff --git a/src/gromacs/gmxpreprocess/resall.cpp b/src/gromacs/gmxpreprocess/resall.cpp
index 689eae682f..4304dbda00 100644
--- a/src/gromacs/gmxpreprocess/resall.cpp
+++ b/src/gromacs/gmxpreprocess/resall.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -90,7 +90,7 @@ gpp_atomtype_t read_atype(const char *ffdir, t_symtab *tab)
             if (sscanf(buf, "%s%lf", name, &m) == 2)
             {
                 a->m = m;
-                add_atomtype(at, tab, a, name, nb, 0, 0.0, 0.0, 0.0, 0, 0.0, 0.0 );
+                add_atomtype(at, tab, a, name, nb, 0, 0);
                 fprintf(stderr, "\rAtomtype %d", ++nratt);
                 fflush(stderr);
             }
diff --git a/src/gromacs/gmxpreprocess/tests/readir.cpp b/src/gromacs/gmxpreprocess/tests/readir.cpp
index 6fd6f7d274..6500dd0beb 100644
--- a/src/gromacs/gmxpreprocess/tests/readir.cpp
+++ b/src/gromacs/gmxpreprocess/tests/readir.cpp
@@ -189,5 +189,17 @@ TEST_F(GetIrTest, TerminatesOnDuplicateOldAndNewKeys)
     EXPECT_DEATH(runTest(joinStrings(inputMdpFile, "\n")), "A parameter is present with both");
 }
 
+TEST_F(GetIrTest, ImplicitSolventNoWorks)
+{
+    const char *inputMdpFile = "implicit-solvent = no";
+    runTest(inputMdpFile);
+}
+
+TEST_F(GetIrTest, ImplicitSolventYesWorks)
+{
+    const char *inputMdpFile = "implicit-solvent = yes";
+    EXPECT_DEATH(runTest(inputMdpFile), "Invalid enum");
+}
+
 } // namespace
 } // namespace
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml
index 8d2b356f90..c0edc1f5e2 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_EmptyInputWorks.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml
index 27684c38d6..538864d80b 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesDifferentKindsOfMdpLines.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml
index 0d8780d26b..cc6a71bdc6 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_HandlesOnlyCutoffScheme.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml
new file mode 100644
index 0000000000..c0edc1f5e2
--- /dev/null
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ImplicitSolventNoWorks.xml
@@ -0,0 +1,321 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <Bool Name="Error parsing mdp file">false</Bool>
+  <String Name="OutputMdpFile">
+; VARIOUS PREPROCESSING OPTIONS
+; Preprocessor information: use cpp syntax.
+; e.g.: -I/home/joe/doe -I/home/mary/roe
+include                  = 
+; e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)
+define                   = 
+
+; RUN CONTROL PARAMETERS
+integrator               = md
+; Start time and timestep in ps
+tinit                    = 0
+dt                       = 0.001
+nsteps                   = 0
+; For exact run continuation or redoing part of a run
+init-step                = 0
+; Part index is updated automatically on checkpointing (keeps files separate)
+simulation-part          = 1
+; mode for center of mass motion removal
+comm-mode                = Linear
+; number of steps for center of mass motion removal
+nstcomm                  = 100
+; group(s) for center of mass motion removal
+comm-grps                = 
+
+; LANGEVIN DYNAMICS OPTIONS
+; Friction coefficient (amu/ps) and random seed
+bd-fric                  = 0
+ld-seed                  = -1
+
+; ENERGY MINIMIZATION OPTIONS
+; Force tolerance and initial step-size
+emtol                    = 10
+emstep                   = 0.01
+; Max number of iterations in relax-shells
+niter                    = 20
+; Step size (ps^2) for minimization of flexible constraints
+fcstep                   = 0
+; Frequency of steepest descents steps when doing CG
+nstcgsteep               = 1000
+nbfgscorr                = 10
+
+; TEST PARTICLE INSERTION OPTIONS
+rtpi                     = 0.05
+
+; OUTPUT CONTROL OPTIONS
+; Output frequency for coords (x), velocities (v) and forces (f)
+nstxout                  = 0
+nstvout                  = 0
+nstfout                  = 0
+; Output frequency for energies to log file and energy file
+nstlog                   = 1000
+nstcalcenergy            = 100
+nstenergy                = 1000
+; Output frequency and precision for .xtc file
+nstxout-compressed       = 0
+compressed-x-precision   = 1000
+; This selects the subset of atoms for the compressed
+; trajectory file. You can select multiple groups. By
+; default, all atoms will be written.
+compressed-x-grps        = 
+; Selection of energy groups
+energygrps               = 
+
+; NEIGHBORSEARCHING PARAMETERS
+; cut-off scheme (Verlet: particle based cut-offs, group: using charge groups)
+cutoff-scheme            = Verlet
+; nblist update frequency
+nstlist                  = 10
+; ns algorithm (simple or grid)
+ns-type                  = Grid
+; Periodic boundary conditions: xyz, no, xy
+pbc                      = xyz
+periodic-molecules       = no
+; Allowed energy error due to the Verlet buffer in kJ/mol/ps per atom,
+; a value of -1 means: use rlist
+verlet-buffer-tolerance  = 0.005
+; nblist cut-off        
+rlist                    = 1
+; long-range cut-off for switched potentials
+
+; OPTIONS FOR ELECTROSTATICS AND VDW
+; Method for doing electrostatics
+coulombtype              = Cut-off
+coulomb-modifier         = Potential-shift-Verlet
+rcoulomb-switch          = 0
+rcoulomb                 = 1
+; Relative dielectric constant for the medium and the reaction field
+epsilon-r                = 1
+epsilon-rf               = 0
+; Method for doing Van der Waals
+vdw-type                 = Cut-off
+vdw-modifier             = Potential-shift-Verlet
+; cut-off lengths       
+rvdw-switch              = 0
+rvdw                     = 1
+; Apply long range dispersion corrections for Energy and Pressure
+DispCorr                 = No
+; Extension of the potential lookup tables beyond the cut-off
+table-extension          = 1
+; Separate tables between energy group pairs
+energygrp-table          = 
+; Spacing for the PME/PPPM FFT grid
+fourierspacing           = 0.12
+; FFT grid size, when a value is 0 fourierspacing will be used
+fourier-nx               = 0
+fourier-ny               = 0
+fourier-nz               = 0
+; EWALD/PME/PPPM parameters
+pme-order                = 4
+ewald-rtol               = 1e-05
+ewald-rtol-lj            = 0.001
+lj-pme-comb-rule         = Geometric
+ewald-geometry           = 3d
+epsilon-surface          = 0
+implicit-solvent         = no
+
+; OPTIONS FOR WEAK COUPLING ALGORITHMS
+; Temperature coupling  
+tcoupl                   = No
+nsttcouple               = -1
+nh-chain-length          = 10
+print-nose-hoover-chain-variables = no
+; Groups to couple separately
+tc-grps                  = 
+; Time constant (ps) and reference temperature (K)
+tau-t                    = 
+ref-t                    = 
+; pressure coupling     
+pcoupl                   = No
+pcoupltype               = Isotropic
+nstpcouple               = -1
+; Time constant (ps), compressibility (1/bar) and reference P (bar)
+tau-p                    = 1
+compressibility          = 
+ref-p                    = 
+; Scaling of reference coordinates, No, All or COM
+refcoord-scaling         = No
+
+; OPTIONS FOR QMMM calculations
+QMMM                     = no
+; Groups treated Quantum Mechanically
+QMMM-grps                = 
+; QM method             
+QMmethod                 = 
+; QMMM scheme           
+QMMMscheme               = normal
+; QM basisset           
+QMbasis                  = 
+; QM charge             
+QMcharge                 = 
+; QM multiplicity       
+QMmult                   = 
+; Surface Hopping       
+SH                       = 
+; CAS space options     
+CASorbitals              = 
+CASelectrons             = 
+SAon                     = 
+SAoff                    = 
+SAsteps                  = 
+; Scale factor for MM charges
+MMChargeScaleFactor      = 1
+
+; SIMULATED ANNEALING  
+; Type of annealing for each temperature group (no/single/periodic)
+annealing                = 
+; Number of time points to use for specifying annealing in each group
+annealing-npoints        = 
+; List of times at the annealing points for each group
+annealing-time           = 
+; Temp. at each annealing point, for each group.
+annealing-temp           = 
+
+; GENERATE VELOCITIES FOR STARTUP RUN
+gen-vel                  = no
+gen-temp                 = 300
+gen-seed                 = -1
+
+; OPTIONS FOR BONDS    
+constraints              = none
+; Type of constraint algorithm
+constraint-algorithm     = Lincs
+; Do not constrain the start configuration
+continuation             = no
+; Use successive overrelaxation to reduce the number of shake iterations
+Shake-SOR                = no
+; Relative tolerance of shake
+shake-tol                = 0.0001
+; Highest order in the expansion of the constraint coupling matrix
+lincs-order              = 4
+; Number of iterations in the final step of LINCS. 1 is fine for
+; normal simulations, but use 2 to conserve energy in NVE runs.
+; For energy minimization with constraints it should be 4 to 8.
+lincs-iter               = 1
+; Lincs will write a warning to the stderr if in one step a bond
+; rotates over more degrees than
+lincs-warnangle          = 30
+; Convert harmonic bonds to morse potentials
+morse                    = no
+
+; ENERGY GROUP EXCLUSIONS
+; Pairs of energy groups for which all non-bonded interactions are excluded
+energygrp-excl           = 
+
+; WALLS                
+; Number of walls, type, atom types, densities and box-z scale factor for Ewald
+nwall                    = 0
+wall-type                = 9-3
+wall-r-linpot            = -1
+wall-atomtype            = 
+wall-density             = 
+wall-ewald-zfac          = 3
+
+; COM PULLING          
+pull                     = no
+
+; AWH biasing          
+awh                      = no
+
+; ENFORCED ROTATION    
+; Enforced rotation: No or Yes
+rotation                 = no
+
+; Group to display and/or manipulate in interactive MD session
+IMD-group                = 
+
+; NMR refinement stuff 
+; Distance restraints type: No, Simple or Ensemble
+disre                    = No
+; Force weighting of pairs in one distance restraint: Conservative or Equal
+disre-weighting          = Conservative
+; Use sqrt of the time averaged times the instantaneous violation
+disre-mixed              = no
+disre-fc                 = 1000
+disre-tau                = 0
+; Output frequency for pair distances to energy file
+nstdisreout              = 100
+; Orientation restraints: No or Yes
+orire                    = no
+; Orientation restraints force constant and tau for time averaging
+orire-fc                 = 0
+orire-tau                = 0
+orire-fitgrp             = 
+; Output frequency for trace(SD) and S to energy file
+nstorireout              = 100
+
+; Free energy variables
+free-energy              = no
+couple-moltype           = 
+couple-lambda0           = vdw-q
+couple-lambda1           = vdw-q
+couple-intramol          = no
+init-lambda              = -1
+init-lambda-state        = -1
+delta-lambda             = 0
+nstdhdl                  = 50
+fep-lambdas              = 
+mass-lambdas             = 
+coul-lambdas             = 
+vdw-lambdas              = 
+bonded-lambdas           = 
+restraint-lambdas        = 
+temperature-lambdas      = 
+calc-lambda-neighbors    = 1
+init-lambda-weights      = 
+dhdl-print-energy        = no
+sc-alpha                 = 0
+sc-power                 = 1
+sc-r-power               = 6
+sc-sigma                 = 0.3
+sc-coul                  = no
+separate-dhdl-file       = yes
+dhdl-derivatives         = yes
+dh_hist_size             = 0
+dh_hist_spacing          = 0.1
+
+; Non-equilibrium MD stuff
+acc-grps                 = 
+accelerate               = 
+freezegrps               = 
+freezedim                = 
+cos-acceleration         = 0
+deform                   = 
+
+; simulated tempering variables
+simulated-tempering      = no
+simulated-tempering-scaling = geometric
+sim-temp-low             = 300
+sim-temp-high            = 300
+
+; Ion/water position swapping for computational electrophysiology setups
+; Swap positions along direction: no, X, Y, Z
+swapcoords               = no
+adress                   = no
+
+; User defined thingies
+user1-grps               = 
+user2-grps               = 
+userint1                 = 0
+userint2                 = 0
+userint3                 = 0
+userint4                 = 0
+userreal1                = 0
+userreal2                = 0
+userreal3                = 0
+userreal4                = 0
+; Electric fields
+; Format for electric-field-x, etc. is: four real variables:
+; amplitude (V/nm), frequency omega (1/ps), time for the pulse peak (ps),
+; and sigma (ps) width of the pulse. Omega = 0 means static field,
+; sigma = 0 means no pulse, leaving the field to be a cosine function.
+electric-field-x         = 0 0 0 0
+electric-field-y         = 0 0 0 0
+electric-field-z         = 0 0 0 0
+</String>
+</ReferenceData>
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml
index 353f36905d..9f2bcabde7 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricField.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml
index 538ebd3da9..8b52b14d80 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldOscillating.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml
index 0de50f50c7..4b2df698fa 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_ProducesOutputFromElectricFieldPulsed.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml
index 8d2b356f90..c0edc1f5e2 100644
--- a/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml
+++ b/src/gromacs/gmxpreprocess/tests/refdata/GetIrTest_UserErrorsSilentlyTolerated.xml
@@ -117,31 +117,7 @@ ewald-rtol-lj            = 0.001
 lj-pme-comb-rule         = Geometric
 ewald-geometry           = 3d
 epsilon-surface          = 0
-
-; IMPLICIT SOLVENT ALGORITHM
-implicit-solvent         = No
-
-; GENERALIZED BORN ELECTROSTATICS
-; Algorithm for calculating Born radii
-gb-algorithm             = Still
-; Frequency of calculating the Born radii inside rlist
-nstgbradii               = 1
-; Cutoff for Born radii calculation; the contribution from atoms
-; between rlist and rgbradii is updated every nstlist steps
-rgbradii                 = 1
-; Dielectric coefficient of the implicit solvent
-gb-epsilon-solvent       = 80
-; Salt concentration in M for Generalized Born models
-gb-saltconc              = 0
-; Scaling factors used in the OBC GB model. Default values are OBC(II)
-gb-obc-alpha             = 1
-gb-obc-beta              = 0.8
-gb-obc-gamma             = 4.85
-gb-dielectric-offset     = 0.009
-sa-algorithm             = Ace-approximation
-; Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA
-; The value -1 will set default value for Still/HCT/OBC GB-models.
-sa-surface-tension       = -1
+implicit-solvent         = no
 
 ; OPTIONS FOR WEAK COUPLING ALGORITHMS
 ; Temperature coupling  
diff --git a/src/gromacs/gmxpreprocess/topdirs.cpp b/src/gromacs/gmxpreprocess/topdirs.cpp
index b955ba2d91..7844f89353 100644
--- a/src/gromacs/gmxpreprocess/topdirs.cpp
+++ b/src/gromacs/gmxpreprocess/topdirs.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -366,8 +366,6 @@ void DS_Init(DirStack **DS)
         set_nec(&(necessary[d_angletypes]), d_atomtypes, d_none);
         set_nec(&(necessary[d_dihedraltypes]), d_atomtypes, d_none);
         set_nec(&(necessary[d_nonbond_params]), d_atomtypes, d_none);
-        set_nec(&(necessary[d_implicit_genborn_params]), d_atomtypes, d_none);
-        set_nec(&(necessary[d_implicit_surface_params]), d_atomtypes, d_none);
         set_nec(&(necessary[d_cmaptypes]), d_atomtypes, d_none);
         set_nec(&(necessary[d_moleculetype]), d_atomtypes, d_none);
         set_nec(&(necessary[d_atoms]), d_moleculetype, d_none);
diff --git a/src/gromacs/gmxpreprocess/topio.cpp b/src/gromacs/gmxpreprocess/topio.cpp
index 905e27da4e..73e5abf422 100644
--- a/src/gromacs/gmxpreprocess/topio.cpp
+++ b/src/gromacs/gmxpreprocess/topio.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -65,7 +65,6 @@
 #include "gromacs/gmxpreprocess/vsite_parm.h"
 #include "gromacs/math/units.h"
 #include "gromacs/math/utilities.h"
-#include "gromacs/mdlib/genborn.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/pbcutil/pbc.h"
@@ -396,182 +395,6 @@ static char ** cpp_opts(const char *define, const char *include,
 }
 
 
-static int
-find_gb_bondlength(t_params *plist, int ai, int aj, real *length)
-{
-    int i, j, a1, a2;
-
-    int found = 0;
-    int status;
-
-    for (i = 0; i < F_NRE && !found; i++)
-    {
-        if (IS_CHEMBOND(i))
-        {
-            for (j = 0; j < plist[i].nr; j++)
-            {
-                a1 = plist[i].param[j].a[0];
-                a2 = plist[i].param[j].a[1];
-
-                if ( (a1 == ai && a2 == aj) || (a1 == aj && a2 == ai))
-                {
-                    /* Equilibrium bond distance */
-                    *length = plist[i].param[j].c[0];
-                    found   = 1;
-                }
-            }
-        }
-    }
-    status = !found;
-
-    return status;
-}
-
-
-static int
-find_gb_anglelength(t_params *plist, int ai, int ak, real *length)
-{
-    int  i, j, a1, a2, a3;
-    real r12, r23, a123;
-    int  found = 0;
-    int  status, status1, status2;
-
-    r12 = r23 = 0;
-
-    for (i = 0; i < F_NRE && !found; i++)
-    {
-        if (IS_ANGLE(i))
-        {
-            for (j = 0; j < plist[i].nr; j++)
-            {
-                a1 = plist[i].param[j].a[0];
-                a2 = plist[i].param[j].a[1];
-                a3 = plist[i].param[j].a[2];
-
-                /* We dont care what the middle atom is, but use it below */
-                if ( (a1 == ai && a3 == ak) || (a1 == ak && a3 == ai) )
-                {
-                    /* Equilibrium bond distance */
-                    a123 = plist[i].param[j].c[0];
-                    /* Use middle atom to find reference distances r12 and r23 */
-                    status1 = find_gb_bondlength(plist, a1, a2, &r12);
-                    status2 = find_gb_bondlength(plist, a2, a3, &r23);
-
-                    if (status1 == 0 && status2 == 0)
-                    {
-                        /* cosine theorem to get r13 */
-                        *length = std::sqrt(r12*r12+r23*r23-(2*r12*r23*cos(a123/RAD2DEG)));
-                        found   = 1;
-                    }
-                }
-            }
-        }
-    }
-    status = !found;
-
-    return status;
-}
-
-static int
-generate_gb_exclusion_interactions(t_molinfo *mi, gpp_atomtype_t atype, t_nextnb *nnb)
-{
-    int          j, n, ai, aj, ti, tj;
-    int          ftype;
-    t_param      param;
-    t_params *   plist;
-    t_atoms *    at;
-    real         radiusi, radiusj;
-    real         gb_radiusi, gb_radiusj;
-    real         param_c2, param_c4;
-    real         distance;
-
-    plist = mi->plist;
-    at    = &mi->atoms;
-
-    for (n = 1; n <= nnb->nrex; n++)
-    {
-        switch (n)
-        {
-            case 1:
-                ftype    = F_GB12;
-                param_c2 = STILL_P2;
-                param_c4 = 0.8875;
-                break;
-            case 2:
-                ftype    = F_GB13;
-                param_c2 = STILL_P3;
-                param_c4 = 0.3516;
-                break;
-            default:
-                /* Put all higher-order exclusions into 1,4 list so we dont miss them */
-                ftype    = F_GB14;
-                param_c2 = STILL_P3;
-                param_c4 = 0.3516;
-                break;
-        }
-
-        for (ai = 0; ai < nnb->nr; ai++)
-        {
-            ti         = at->atom[ai].type;
-            radiusi    = get_atomtype_radius(ti, atype);
-            gb_radiusi = get_atomtype_gb_radius(ti, atype);
-
-            for (j = 0; j < nnb->nrexcl[ai][n]; j++)
-            {
-                aj = nnb->a[ai][n][j];
-
-                /* Only add the interactions once */
-                if (aj > ai)
-                {
-                    tj         = at->atom[aj].type;
-                    radiusj    = get_atomtype_radius(tj, atype);
-                    gb_radiusj = get_atomtype_gb_radius(tj, atype);
-
-                    /* There is an exclusion of type "ftype" between atoms ai and aj */
-                    param.a[0] = ai;
-                    param.a[1] = aj;
-
-                    /* Reference distance, not used for 1-4 interactions */
-                    switch (ftype)
-                    {
-                        case F_GB12:
-                            if (find_gb_bondlength(plist, ai, aj, &distance) != 0)
-                            {
-                                gmx_fatal(FARGS, "Cannot find bond length for atoms %d-%d", ai, aj);
-                            }
-                            break;
-                        case F_GB13:
-                            if (find_gb_anglelength(plist, ai, aj, &distance) != 0)
-                            {
-                                gmx_fatal(FARGS, "Cannot find length for atoms %d-%d involved in angle", ai, aj);
-                            }
-                            break;
-                        default:
-                            distance = -1;
-                            break;
-                    }
-                    /* Assign GB parameters */
-                    /* Sum of radii */
-                    param.c[0] = radiusi+radiusj;
-                    /* Reference distance distance */
-                    param.c[1] = distance;
-                    /* Still parameter */
-                    param.c[2] = param_c2;
-                    /* GB radius */
-                    param.c[3] = gb_radiusi+gb_radiusj;
-                    /* Parameter */
-                    param.c[4] = param_c4;
-
-                    /* Add it to the parameter list */
-                    add_param_to_list(&plist[ftype], &param);
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-
 static void make_atoms_sys(int nmolb, const gmx_molblock_t *molb,
                            const t_molinfo *molinfo,
                            t_atoms *atoms)
@@ -615,7 +438,6 @@ static char **read_topol(const char *infile, const char *outfile,
                          int         *nmolblock,
                          gmx_molblock_t **molblock,
                          gmx_bool        bFEP,
-                         gmx_bool        bGenborn,
                          gmx_bool        bZero,
                          gmx_bool        usingFullRangeElectrostatics,
                          warninp_t       wi)
@@ -917,11 +739,15 @@ static char **read_topol(const char *infile, const char *outfile,
                          */
 
                         case d_implicit_genborn_params:
-                            push_gb_params(atype, pline, wi);
+                            // Skip this line, so old topologies with
+                            // GB parameters can be read.
                             break;
 
                         case d_implicit_surface_params:
-                            gmx_fatal(FARGS, "Implicit surface directive not supported yet.");
+                            // Skip this line, so that any topologies
+                            // with surface parameters can be read
+                            // (even though these were never formally
+                            // supported).
                             break;
 
                         case d_cmaptypes:
@@ -1068,14 +894,6 @@ static char **read_topol(const char *infile, const char *outfile,
 
 
 
-                                /* nnb contains information about first,2nd,3rd bonded neighbors.
-                                 * Use this to generate GB 1-2,1-3,1-4 interactions when necessary.
-                                 */
-                                if (bGenborn == TRUE)
-                                {
-                                    generate_gb_exclusion_interactions(mi0, atype, &nnb);
-                                }
-
                                 done_nnb(&nnb);
 
                                 if (bCouple)
@@ -1184,7 +1002,6 @@ char **do_top(gmx_bool          bVerbose,
               const t_inputrec *ir,
               int              *nmolblock,
               gmx_molblock_t  **molblock,
-              gmx_bool          bGenborn,
               warninp_t         wi)
 {
     /* Tmpfile might contain a long path */
@@ -1209,7 +1026,7 @@ char **do_top(gmx_bool          bVerbose,
                        nrmols, molinfo, intermolecular_interactions,
                        plist, combination_rule, repulsion_power,
                        opts, fudgeQQ, nmolblock, molblock,
-                       ir->efep != efepNO, bGenborn, bZero,
+                       ir->efep != efepNO, bZero,
                        EEL_FULL(ir->coulombtype), wi);
 
     if ((*combination_rule != eCOMB_GEOMETRIC) &&
diff --git a/src/gromacs/gmxpreprocess/topio.h b/src/gromacs/gmxpreprocess/topio.h
index 4023f8fceb..bb79551e62 100644
--- a/src/gromacs/gmxpreprocess/topio.h
+++ b/src/gromacs/gmxpreprocess/topio.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -68,7 +68,6 @@ char **do_top(gmx_bool          bVerbose,
               const t_inputrec *ir,
               int              *nmolblock,
               gmx_molblock_t  **molblock,
-              gmx_bool          bGB,
               warninp_t         wi);
 
 /* This routine expects sys->molt[m].ilist to be of size F_NRE and ordered. */
diff --git a/src/gromacs/gmxpreprocess/toppush.cpp b/src/gromacs/gmxpreprocess/toppush.cpp
index 24381dcd06..c49ad80a76 100644
--- a/src/gromacs/gmxpreprocess/toppush.cpp
+++ b/src/gromacs/gmxpreprocess/toppush.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -238,7 +238,6 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
     char       type[STRLEN], btype[STRLEN], ptype[STRLEN];
     double     m, q;
     double     c[MAXFORCEPARAM];
-    double     radius, vol, surftens, gb_radius, S_hct;
     char       tmpfield[12][100]; /* Max 12 fields of width 100 */
     char       errbuf[STRLEN];
     t_atom    *atom;
@@ -308,12 +307,7 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
     }
 
     /* optional fields */
-    surftens  = -1;
-    vol       = -1;
-    radius    = -1;
-    gb_radius = -1;
     atomnr    = -1;
-    S_hct     = -1;
 
     switch (nb_funct)
     {
@@ -325,9 +319,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
             {
                 if (have_bonded_type)
                 {
-                    nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf",
-                                   type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf",
+                                   type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1]);
                     if (nread < 8)
                     {
                         too_few(wi);
@@ -337,9 +330,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
                 else
                 {
                     /* have_atomic_number && !have_bonded_type */
-                    nread = sscanf(line, "%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf",
-                                   type, &atomnr, &m, &q, ptype, &c[0], &c[1],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%d%lf%lf%s%lf%lf",
+                                   type, &atomnr, &m, &q, ptype, &c[0], &c[1]);
                     if (nread < 7)
                     {
                         too_few(wi);
@@ -352,9 +344,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
                 if (have_bonded_type)
                 {
                     /* !have_atomic_number && have_bonded_type */
-                    nread = sscanf(line, "%s%s%lf%lf%s%lf%lf%lf%lf%lf%lf",
-                                   type, btype, &m, &q, ptype, &c[0], &c[1],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%s%lf%lf%s%lf%lf",
+                                   type, btype, &m, &q, ptype, &c[0], &c[1]);
                     if (nread < 7)
                     {
                         too_few(wi);
@@ -364,9 +355,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
                 else
                 {
                     /* !have_atomic_number && !have_bonded_type */
-                    nread = sscanf(line, "%s%lf%lf%s%lf%lf%lf%lf%lf%lf",
-                                   type, &m, &q, ptype, &c[0], &c[1],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%lf%lf%s%lf%lf",
+                                   type, &m, &q, ptype, &c[0], &c[1]);
                     if (nread < 6)
                     {
                         too_few(wi);
@@ -394,9 +384,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
             {
                 if (have_bonded_type)
                 {
-                    nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf%lf",
-                                   type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%s%d%lf%lf%s%lf%lf%lf",
+                                   type, btype, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2]);
                     if (nread < 9)
                     {
                         too_few(wi);
@@ -406,9 +395,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
                 else
                 {
                     /* have_atomic_number && !have_bonded_type */
-                    nread = sscanf(line, "%s%d%lf%lf%s%lf%lf%lf%lf%lf%lf%lf",
-                                   type, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%d%lf%lf%s%lf%lf%lf",
+                                   type, &atomnr, &m, &q, ptype, &c[0], &c[1], &c[2]);
                     if (nread < 8)
                     {
                         too_few(wi);
@@ -421,9 +409,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
                 if (have_bonded_type)
                 {
                     /* !have_atomic_number && have_bonded_type */
-                    nread = sscanf(line, "%s%s%lf%lf%s%lf%lf%lf%lf%lf%lf%lf",
-                                   type, btype, &m, &q, ptype, &c[0], &c[1], &c[2],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%s%lf%lf%s%lf%lf%lf",
+                                   type, btype, &m, &q, ptype, &c[0], &c[1], &c[2]);
                     if (nread < 8)
                     {
                         too_few(wi);
@@ -433,9 +420,8 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
                 else
                 {
                     /* !have_atomic_number && !have_bonded_type */
-                    nread = sscanf(line, "%s%lf%lf%s%lf%lf%lf%lf%lf%lf%lf",
-                                   type, &m, &q, ptype, &c[0], &c[1], &c[2],
-                                   &radius, &vol, &surftens, &gb_radius);
+                    nread = sscanf(line, "%s%lf%lf%s%lf%lf%lf",
+                                   type, &m, &q, ptype, &c[0], &c[1], &c[2]);
                     if (nread < 7)
                     {
                         too_few(wi);
@@ -517,15 +503,14 @@ void push_at (t_symtab *symtab, gpp_atomtype_t at, t_bond_atomtype bat,
         sprintf(errbuf, "Overriding atomtype %s", type);
         warning(wi, errbuf);
         if ((nr = set_atomtype(nr, at, symtab, atom, type, param, batype_nr,
-                               radius, vol, surftens, atomnr, gb_radius, S_hct)) == NOTSET)
+                               atomnr)) == NOTSET)
         {
             sprintf(errbuf, "Replacing atomtype %s failed", type);
             warning_error_and_exit(wi, errbuf, FARGS);
         }
     }
     else if ((add_atomtype(at, symtab, atom, type, param,
-                           batype_nr, radius, vol,
-                           surftens, atomnr, gb_radius, S_hct)) == NOTSET)
+                           batype_nr, atomnr)) == NOTSET)
     {
         sprintf(errbuf, "Adding atomtype %s failed", type);
         warning_error_and_exit(wi, errbuf, FARGS);
@@ -1082,33 +1067,6 @@ void push_nbt(directive d, t_nbparam **nbt, gpp_atomtype_t atype,
     }
 }
 
-void
-push_gb_params (gpp_atomtype_t at, char *line,
-                warninp_t wi)
-{
-    int    atype;
-    double radius, vol, surftens, gb_radius, S_hct;
-    char   atypename[STRLEN];
-    char   errbuf[STRLEN];
-
-    if ( (sscanf(line, "%s%lf%lf%lf%lf%lf", atypename, &radius, &vol, &surftens, &gb_radius, &S_hct)) != 6)
-    {
-        sprintf(errbuf, "Too few gb parameters for type %s\n", atypename);
-        warning(wi, errbuf);
-    }
-
-    /* Search for atomtype */
-    atype = get_atomtype_type(atypename, at);
-
-    if (atype == NOTSET)
-    {
-        printf("Couldn't find topology match for atomtype %s\n", atypename);
-        abort();
-    }
-
-    set_atomtype_gbparam(at, atype, radius, vol, surftens, gb_radius, S_hct);
-}
-
 void
 push_cmaptype(directive d, t_params bt[], int nral, gpp_atomtype_t at,
               t_bond_atomtype bat, char *line,
@@ -2633,7 +2591,7 @@ int add_atomtype_decoupled(t_symtab *symtab, gpp_atomtype_t at,
         param.c[i] = 0.0;
     }
 
-    nr = add_atomtype(at, symtab, &atom, "decoupled", &param, -1, 0.0, 0.0, 0.0, 0, 0, 0);
+    nr = add_atomtype(at, symtab, &atom, "decoupled", &param, -1, 0);
 
     /* Add space in the non-bonded parameters matrix */
     realloc_nb_params(at, nbparam, pair);
diff --git a/src/gromacs/gmxpreprocess/toppush.h b/src/gromacs/gmxpreprocess/toppush.h
index 24619f2398..3c6e2bc57f 100644
--- a/src/gromacs/gmxpreprocess/toppush.h
+++ b/src/gromacs/gmxpreprocess/toppush.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -76,11 +76,6 @@ void push_nbt(directive d, t_nbparam **nbt, gpp_atomtype_t atype,
               char *plines, int nb_funct,
               warninp_t wi);
 
-void
-push_gb_params(gpp_atomtype_t atype,
-               char          *line,
-               warninp_t      wi);
-
 void push_atom(struct t_symtab *symtab,
                t_block         *cgs,
                t_atoms         *at,
diff --git a/src/gromacs/gmxpreprocess/toputil.cpp b/src/gromacs/gmxpreprocess/toputil.cpp
index 6e84f4a3af..01766cfca8 100644
--- a/src/gromacs/gmxpreprocess/toputil.cpp
+++ b/src/gromacs/gmxpreprocess/toputil.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2012,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -532,7 +532,7 @@ void print_bondeds(FILE *out, int natoms, directive d,
     {
         char buf[12];
         sprintf(buf, "%4d", (i+1));
-        add_atomtype(atype, &stab, a, buf, param, 0, 0, 0, 0, 0, 0, 0);
+        add_atomtype(atype, &stab, a, buf, param, 0, 0);
     }
     print_bt(out, d, atype, ftype, fsubtype, plist, TRUE);
 
diff --git a/src/gromacs/listed-forces/listed-forces.cpp b/src/gromacs/listed-forces/listed-forces.cpp
index 0de7f7b640..f7aedaa034 100644
--- a/src/gromacs/listed-forces/listed-forces.cpp
+++ b/src/gromacs/listed-forces/listed-forces.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -405,8 +405,7 @@ ftype_is_bonded_potential(int ftype)
 {
     return
         (interaction_function[ftype].flags & IF_BOND) &&
-        !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES) &&
-        (ftype < F_GB12 || ftype > F_GB14);
+        !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES);
 }
 
 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
diff --git a/src/gromacs/mdlib/broadcaststructs.cpp b/src/gromacs/mdlib/broadcaststructs.cpp
index 4977cf3acf..6a570aacc5 100644
--- a/src/gromacs/mdlib/broadcaststructs.cpp
+++ b/src/gromacs/mdlib/broadcaststructs.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -769,23 +769,7 @@ static void bc_molblock(const t_commrec *cr, gmx_molblock_t *molb)
 
 static void bc_atomtypes(const t_commrec *cr, t_atomtypes *atomtypes)
 {
-    int nr;
-
     block_bc(cr, atomtypes->nr);
-
-    nr = atomtypes->nr;
-
-    snew_bc(cr, atomtypes->radius, nr);
-    snew_bc(cr, atomtypes->vol, nr);
-    snew_bc(cr, atomtypes->surftens, nr);
-    snew_bc(cr, atomtypes->gb_radius, nr);
-    snew_bc(cr, atomtypes->S_hct, nr);
-
-    nblock_bc(cr, nr, atomtypes->radius);
-    nblock_bc(cr, nr, atomtypes->vol);
-    nblock_bc(cr, nr, atomtypes->surftens);
-    nblock_bc(cr, nr, atomtypes->gb_radius);
-    nblock_bc(cr, nr, atomtypes->S_hct);
 }
 
 /*! \brief Broadcasts ir and mtop from the master to all nodes in
diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp
index f01ea4d333..4f21ca3cca 100644
--- a/src/gromacs/mdlib/force.cpp
+++ b/src/gromacs/mdlib/force.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -57,7 +57,6 @@
 #include "gromacs/math/vec.h"
 #include "gromacs/math/vecdump.h"
 #include "gromacs/mdlib/forcerec-threading.h"
-#include "gromacs/mdlib/genborn.h"
 #include "gromacs/mdlib/mdrun.h"
 #include "gromacs/mdlib/ns.h"
 #include "gromacs/mdlib/qmmm.h"
@@ -144,9 +143,6 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                        gmx::ForceWithVirial *forceWithVirial,
                        gmx_enerdata_t *enerd,
                        t_fcdata   *fcd,
-                       gmx_localtop_t *top,
-                       gmx_genborn_t *born,
-                       gmx_bool       bBornRadii,
                        matrix     box,
                        t_lambda   *fepvals,
                        real       *lambda,
@@ -201,24 +197,6 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
         enerd->dvdl_lin[efptVDW] += dvdl_walls;
     }
 
-    /* If doing GB, reset dvda and calculate the Born radii */
-    if (ir->implicit_solvent)
-    {
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
-
-        for (i = 0; i < born->nr; i++)
-        {
-            fr->dvda[i] = 0;
-        }
-
-        if (bBornRadii)
-        {
-            calc_gb_rad(cr, fr, ir, top, x, fr->gblist, born, md, nrnb);
-        }
-
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-    }
-
     where();
     /* We only do non-bonded calculation with group scheme here, the verlet
      * calls are done from do_force_cutsVERLET(). */
@@ -273,17 +251,6 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
         where();
     }
 
-    /* If we are doing GB, calculate bonded forces and apply corrections
-     * to the solvation forces */
-    /* MRS: Eventually, many need to include free energy contribution here! */
-    if (ir->implicit_solvent)
-    {
-        wallcycle_sub_start(wcycle, ewcsLISTED);
-        calc_gb_forces(cr, md, born, top, x, forceForUseWithShiftForces, fr, idef,
-                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
-        wallcycle_sub_stop(wcycle, ewcsLISTED);
-    }
-
 #if GMX_MPI
     if (TAKETIME)
     {
@@ -703,8 +670,6 @@ void sum_epot(gmx_grppairener_t *grpp, real *epot)
     epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
     epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
     epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
-    /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
-    epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
 
 /* lattice part of LR doesnt belong to any group
  * and has been added earlier
diff --git a/src/gromacs/mdlib/force.h b/src/gromacs/mdlib/force.h
index 2d83e17a25..6bfc66051c 100644
--- a/src/gromacs/mdlib/force.h
+++ b/src/gromacs/mdlib/force.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -164,7 +164,6 @@ void do_force(FILE *log, t_commrec *cr,
               t_forcerec *fr,
               gmx_vsite_t *vsite, rvec mu_tot,
               double t, struct gmx_edsam *ed,
-              gmx_bool bBornRadii,
               int flags,
               DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
               DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion);
@@ -202,9 +201,6 @@ void do_force_lowlevel(t_forcerec   *fr,
                        gmx::ForceWithVirial *forceWithVirial,
                        gmx_enerdata_t *enerd,
                        t_fcdata     *fcd,
-                       gmx_localtop_t *top,
-                       gmx_genborn_t *born,
-                       gmx_bool         bBornRadii,
                        matrix       box,
                        t_lambda     *fepvals,
                        real         *lambda,
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp
index 41f87dedc8..3b77599e06 100644
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -1539,10 +1539,6 @@ gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *
             ir->vdwtype == evdwCUT    &&
             ir->coulombtype == eelCUT &&
             ir->efep == efepNO        &&
-            (ir->implicit_solvent == eisNO ||
-             (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
-                                                  ir->gb_algorithm == egbHCT   ||
-                                                  ir->gb_algorithm == egbOBC))) &&
             getenv("GMX_NO_ALLVSALL") == nullptr
         );
 
@@ -2346,7 +2342,7 @@ void init_forcerec(FILE                    *fp,
                    gmx_bool                 bNoSolvOpt,
                    real                     print_force)
 {
-    int            i, m, negp_pp, negptable, egi, egj;
+    int            m, negp_pp, negptable, egi, egj;
     real           rtab;
     char          *env;
     double         dbl;
@@ -2491,7 +2487,6 @@ void init_forcerec(FILE                    *fp,
     /* Check if we can/should do all-vs-all kernels */
     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, nullptr, nullptr);
     fr->AllvsAll_work   = nullptr;
-    fr->AllvsAll_workgb = nullptr;
 
     /* All-vs-all kernels have not been implemented in 4.6 and later.
      * See Redmine #1249. */
@@ -2592,7 +2587,6 @@ void init_forcerec(FILE                    *fp,
             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
         }
     }
-    fr->bGB = (ir->implicit_solvent == eisGBSA);
 
     fr->rc_scaling = ir->refcoord_scaling;
     copy_rvec(ir->posres_com, fr->posres_com);
@@ -2619,7 +2613,7 @@ void init_forcerec(FILE                    *fp,
     switch (ic->eeltype)
     {
         case eelCUT:
-            fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
             break;
 
         case eelRF:
@@ -2875,63 +2869,9 @@ void init_forcerec(FILE                    *fp,
         set_avcsixtwelve(fp, fr, mtop);
     }
 
-    fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
-
-    /* Copy the GBSA data (radius, volume and surftens for each
-     * atomtype) from the topology atomtype section to forcerec.
-     */
-    snew(fr->atype_radius, fr->ntype);
-    snew(fr->atype_vol, fr->ntype);
-    snew(fr->atype_surftens, fr->ntype);
-    snew(fr->atype_gb_radius, fr->ntype);
-    snew(fr->atype_S_hct, fr->ntype);
-
-    if (mtop->atomtypes.nr > 0)
-    {
-        for (i = 0; i < fr->ntype; i++)
-        {
-            fr->atype_radius[i] = mtop->atomtypes.radius[i];
-        }
-        for (i = 0; i < fr->ntype; i++)
-        {
-            fr->atype_vol[i] = mtop->atomtypes.vol[i];
-        }
-        for (i = 0; i < fr->ntype; i++)
-        {
-            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
-        }
-        for (i = 0; i < fr->ntype; i++)
-        {
-            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
-        }
-        for (i = 0; i < fr->ntype; i++)
-        {
-            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
-        }
-    }
-
-    /* Generate the GB table if needed */
-    if (fr->bGB)
+    if (ir->implicit_solvent)
     {
-        GMX_LOG(mdlog.info).asParagraph().
-            appendText("The support for implicit solvent is deprecated, and may be removed "
-                       "in a future version.");
-#if GMX_DOUBLE
-        fr->gbtabscale = 2000;
-#else
-        fr->gbtabscale = 500;
-#endif
-
-        fr->gbtabr = 100;
-        fr->gbtab  = make_gb_table(fr);
-
-        init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
-
-        /* Copy local gb data (for dd, this is done in dd_partition_system) */
-        if (!DOMAINDECOMP(cr))
-        {
-            make_local_gb(cr, fr->born, ir->gb_algorithm);
-        }
+        gmx_fatal(FARGS, "Implict solvation is no longer supported.");
     }
 
     /* Construct tables for the group scheme. A little unnecessary to
diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h
index 85a4383353..af7e5d77e8 100644
--- a/src/gromacs/mdlib/forcerec.h
+++ b/src/gromacs/mdlib/forcerec.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -38,7 +38,6 @@
 #define GMX_MDLIB_FORCEREC_H
 
 #include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/genborn.h"
 #include "gromacs/mdlib/tgroup.h"
 #include "gromacs/mdlib/vsite.h"
 #include "gromacs/mdtypes/forcerec.h"
@@ -49,6 +48,7 @@ struct gmx_hw_info_t;
 struct t_commrec;
 struct t_fcdata;
 struct t_filenm;
+struct t_inputrec;
 
 namespace gmx
 {
diff --git a/src/gromacs/mdlib/genborn.cpp b/src/gromacs/mdlib/genborn.cpp
deleted file mode 100644
index 7d9cc0f617..0000000000
--- a/src/gromacs/mdlib/genborn.cpp
+++ /dev/null
@@ -1,1713 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#include "gmxpre.h"
-
-#include "genborn.h"
-
-#include <string.h>
-
-#include <cmath>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/genborn_allvsall.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/nblist.h"
-#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-
-typedef struct {
-    int  shift;
-    int  naj;
-    int *aj;
-    int  aj_nalloc;
-} gbtmpnbl_t;
-
-typedef struct gbtmpnbls {
-    int         nlist;
-    gbtmpnbl_t *list;
-    int         list_nalloc;
-} t_gbtmpnbls;
-
-/* This function is exactly the same as the one in listed-forces/bonded.cpp. The reason
- * it is copied here is that the bonded gb-interactions are evaluated
- * not in calc_bonds, but rather in calc_gb_forces
- */
-static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
-{
-    if (pbc)
-    {
-        return pbc_dx_aiuc(pbc, xi, xj, dx);
-    }
-    else
-    {
-        rvec_sub(xi, xj, dx);
-        return CENTRAL;
-    }
-}
-
-static int init_gb_nblist(int natoms, t_nblist *nl)
-{
-    nl->maxnri      = natoms*4;
-    nl->maxnrj      = 0;
-    nl->nri         = 0;
-    nl->nrj         = 0;
-    nl->iinr        = nullptr;
-    nl->gid         = nullptr;
-    nl->shift       = nullptr;
-    nl->jindex      = nullptr;
-    nl->jjnr        = nullptr;
-    /*nl->nltype      = nltype;*/
-
-    srenew(nl->iinr,   nl->maxnri);
-    srenew(nl->gid,    nl->maxnri);
-    srenew(nl->shift,  nl->maxnri);
-    srenew(nl->jindex, nl->maxnri+1);
-
-    nl->jindex[0] = 0;
-
-    return 0;
-}
-
-
-static int init_gb_still(const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
-                         gmx_genborn_t *born, int natoms)
-{
-
-    int   i, j, m, ia, ib;
-    real  r, ri, rj, ri2, rj2, r3, r4, ratio, term, h, doffset;
-
-    real *vsol;
-    real *gp;
-
-    snew(vsol, natoms);
-    snew(gp, natoms);
-    snew(born->gpol_still_work, natoms+3);
-
-    doffset = born->gb_doffset;
-
-    for (i = 0; i < natoms; i++)
-    {
-        born->gpol_globalindex[i]              = born->vsolv_globalindex[i] =
-                born->gb_radius_globalindex[i] = 0;
-    }
-
-    /* Compute atomic solvation volumes for Still method */
-    for (i = 0; i < natoms; i++)
-    {
-        ri = atype->gb_radius[atoms->atom[i].type];
-        born->gb_radius_globalindex[i] = ri;
-        r3 = ri*ri*ri;
-        born->vsolv_globalindex[i] = (4*M_PI/3)*r3;
-    }
-
-    for (j = 0; j < idef->il[F_GB12].nr; j += 3)
-    {
-        m  = idef->il[F_GB12].iatoms[j];
-        ia = idef->il[F_GB12].iatoms[j+1];
-        ib = idef->il[F_GB12].iatoms[j+2];
-
-        r = 1.01*idef->iparams[m].gb.st;
-
-        ri   = atype->gb_radius[atoms->atom[ia].type];
-        rj   = atype->gb_radius[atoms->atom[ib].type];
-
-        ri2  = ri*ri;
-        rj2  = rj*rj;
-
-        ratio  = (rj2-ri2-r*r)/(2*ri*r);
-        h      = ri*(1+ratio);
-        term   = (M_PI/3.0)*h*h*(3.0*ri-h);
-
-        born->vsolv_globalindex[ia] -= term;
-
-        ratio  = (ri2-rj2-r*r)/(2*rj*r);
-        h      = rj*(1+ratio);
-        term   = (M_PI/3.0)*h*h*(3.0*rj-h);
-
-        born->vsolv_globalindex[ib] -= term;
-    }
-
-    /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
-       method */
-    /* Self */
-    for (j = 0; j < natoms; j++)
-    {
-        if (born->use_globalindex[j] == 1)
-        {
-            born->gpol_globalindex[j] = -0.5*ONE_4PI_EPS0/
-                (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P1);
-        }
-    }
-
-    /* 1-2 */
-    for (j = 0; j < idef->il[F_GB12].nr; j += 3)
-    {
-        m  = idef->il[F_GB12].iatoms[j];
-        ia = idef->il[F_GB12].iatoms[j+1];
-        ib = idef->il[F_GB12].iatoms[j+2];
-
-        r = idef->iparams[m].gb.st;
-
-        r4 = r*r*r*r;
-
-        born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+
-            STILL_P2*born->vsolv_globalindex[ib]/r4;
-        born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+
-            STILL_P2*born->vsolv_globalindex[ia]/r4;
-    }
-
-    /* 1-3 */
-    for (j = 0; j < idef->il[F_GB13].nr; j += 3)
-    {
-        m  = idef->il[F_GB13].iatoms[j];
-        ia = idef->il[F_GB13].iatoms[j+1];
-        ib = idef->il[F_GB13].iatoms[j+2];
-
-        r  = idef->iparams[m].gb.st;
-        r4 = r*r*r*r;
-
-        born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+
-            STILL_P3*born->vsolv_globalindex[ib]/r4;
-        born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+
-            STILL_P3*born->vsolv_globalindex[ia]/r4;
-    }
-
-    sfree(vsol);
-    sfree(gp);
-
-    return 0;
-}
-
-/* Initialize all GB datastructs and compute polarization energies */
-int init_gb(gmx_genborn_t **p_born,
-            t_forcerec *fr, const t_inputrec *ir,
-            const gmx_mtop_t *mtop, int gb_algorithm)
-{
-    int             i, jj, natoms;
-    real            rai, sk, doffset;
-
-    t_atoms         atoms;
-    gmx_genborn_t  *born;
-    gmx_localtop_t *localtop;
-
-    natoms   = mtop->natoms;
-
-    atoms    = gmx_mtop_global_atoms(mtop);
-    localtop = gmx_mtop_generate_local_top(mtop, ir->efep != efepNO);
-
-    snew(born, 1);
-    *p_born = born;
-
-    born->nr  = natoms;
-
-    snew(born->drobc, natoms);
-    snew(born->bRad,  natoms);
-
-    /* Allocate memory for the global data arrays */
-    snew(born->param_globalindex, natoms+3);
-    snew(born->gpol_globalindex,  natoms+3);
-    snew(born->vsolv_globalindex, natoms+3);
-    snew(born->gb_radius_globalindex, natoms+3);
-    snew(born->use_globalindex,    natoms+3);
-
-    snew(fr->invsqrta, natoms);
-    snew(fr->dvda,     natoms);
-
-    fr->dadx              = nullptr;
-    fr->dadx_rawptr       = nullptr;
-    fr->nalloc_dadx       = 0;
-    born->gpol_still_work = nullptr;
-    born->gpol_hct_work   = nullptr;
-
-    /* snew(born->asurf,natoms); */
-    /* snew(born->dasurf,natoms); */
-
-    /* Initialize the gb neighbourlist */
-    snew(fr->gblist, 1);
-    init_gb_nblist(natoms, fr->gblist);
-
-    /* Do the Vsites exclusions (if any) */
-    for (i = 0; i < natoms; i++)
-    {
-        jj = atoms.atom[i].type;
-        if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0)
-        {
-            born->use_globalindex[i] = 1;
-        }
-        else
-        {
-            born->use_globalindex[i] = 0;
-        }
-
-        /* If we have a Vsite, put vs_globalindex[i]=0 */
-        if (C6 (fr->nbfp, fr->ntype, jj, jj) == 0 &&
-            C12(fr->nbfp, fr->ntype, jj, jj) == 0 &&
-            atoms.atom[i].q == 0)
-        {
-            born->use_globalindex[i] = 0;
-        }
-    }
-
-    /* Copy algorithm parameters from inputrecord to local structure */
-    born->obc_alpha          = ir->gb_obc_alpha;
-    born->obc_beta           = ir->gb_obc_beta;
-    born->obc_gamma          = ir->gb_obc_gamma;
-    born->gb_doffset         = ir->gb_dielectric_offset;
-    born->gb_epsilon_solvent = ir->gb_epsilon_solvent;
-    born->epsilon_r          = ir->epsilon_r;
-
-    doffset = born->gb_doffset;
-
-    /* Set the surface tension */
-    born->sa_surface_tension = ir->sa_surface_tension;
-
-    /* If Still model, initialise the polarisation energies */
-    if (gb_algorithm == egbSTILL)
-    {
-        init_gb_still(&(mtop->atomtypes), &(localtop->idef), &atoms,
-                      born, natoms);
-    }
-
-
-    /* If HCT/OBC,  precalculate the sk*atype->S_hct factors */
-    else if (gb_algorithm == egbHCT || gb_algorithm == egbOBC)
-    {
-
-        snew(born->gpol_hct_work, natoms+3);
-
-        for (i = 0; i < natoms; i++)
-        {
-            if (born->use_globalindex[i] == 1)
-            {
-                rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset;
-                sk  = rai * mtop->atomtypes.S_hct[atoms.atom[i].type];
-                born->param_globalindex[i]     = sk;
-                born->gb_radius_globalindex[i] = rai;
-            }
-            else
-            {
-                born->param_globalindex[i]     = 0;
-                born->gb_radius_globalindex[i] = 0;
-            }
-        }
-    }
-
-    /* Allocate memory for work arrays for temporary use */
-    snew(born->work, natoms+4);
-    snew(born->count, natoms);
-    snew(born->nblist_work, natoms);
-
-    /* Domain decomposition specific stuff */
-    born->nalloc = 0;
-
-    return 0;
-}
-
-
-
-static int
-calc_gb_rad_still(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
-                  rvec x[], t_nblist *nl,
-                  gmx_genborn_t *born, t_mdatoms *md)
-{
-    int  i, k, n, nj0, nj1, ai, aj;
-    int  shift;
-    real shX, shY, shZ;
-    real gpi, dr2, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
-    real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
-    real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2;
-    real factor;
-    real vai, prod_ai, icf4, icf6;
-
-    factor  = 0.5*ONE_4PI_EPS0;
-    n       = 0;
-
-    for (i = 0; i < born->nr; i++)
-    {
-        born->gpol_still_work[i] = 0;
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ai      = nl->iinr[i];
-
-        nj0     = nl->jindex[i];
-        nj1     = nl->jindex[i+1];
-
-        /* Load shifts for this list */
-        shift   = nl->shift[i];
-        shX     = fr->shift_vec[shift][0];
-        shY     = fr->shift_vec[shift][1];
-        shZ     = fr->shift_vec[shift][2];
-
-        gpi     = 0;
-
-        rai     = top->atomtypes.gb_radius[md->typeA[ai]];
-        vai     = born->vsolv[ai];
-        prod_ai = STILL_P4*vai;
-
-        /* Load atom i coordinates, add shift vectors */
-        ix1     = shX + x[ai][0];
-        iy1     = shY + x[ai][1];
-        iz1     = shZ + x[ai][2];
-
-        for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
-        {
-            aj    = nl->jjnr[k];
-            jx1   = x[aj][0];
-            jy1   = x[aj][1];
-            jz1   = x[aj][2];
-
-            dx11  = ix1-jx1;
-            dy11  = iy1-jy1;
-            dz11  = iz1-jz1;
-
-            dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
-            rinv  = gmx::invsqrt(dr2);
-            idr2  = rinv*rinv;
-            idr4  = idr2*idr2;
-            idr6  = idr4*idr2;
-
-            raj = top->atomtypes.gb_radius[md->typeA[aj]];
-
-            rvdw  = rai + raj;
-
-            ratio = dr2 / (rvdw * rvdw);
-            vaj   = born->vsolv[aj];
-
-            if (ratio > STILL_P5INV)
-            {
-                ccf  = 1.0;
-                dccf = 0.0;
-            }
-            else
-            {
-                theta = ratio*STILL_PIP5;
-                cosq  = cos(theta);
-                term  = 0.5*(1.0-cosq);
-                ccf   = term*term;
-                sinq  = 1.0 - cosq*cosq;
-                dccf  = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
-            }
-
-            prod                       = STILL_P4*vaj;
-            icf4                       = ccf*idr4;
-            icf6                       = (4*ccf-dccf)*idr6;
-            born->gpol_still_work[aj] += prod_ai*icf4;
-            gpi                        = gpi+prod*icf4;
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            fr->dadx[n++]   = prod*icf6;
-            fr->dadx[n++]   = prod_ai*icf6;
-        }
-        born->gpol_still_work[ai] += gpi;
-    }
-
-    /* Parallel summations */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, born->gpol_still_work);
-    }
-
-    /* Calculate the radii */
-    for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-    {
-        if (born->use[i] != 0)
-        {
-            gpi             = born->gpol[i]+born->gpol_still_work[i];
-            gpi2            = gpi * gpi;
-            born->bRad[i]   = factor*gmx::invsqrt(gpi2);
-            fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
-        }
-    }
-
-    /* Extra communication required for DD */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_spread_real(cr->dd, born->bRad);
-        dd_atom_spread_real(cr->dd, fr->invsqrta);
-    }
-
-    return 0;
-
-}
-
-
-static int
-calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
-                rvec x[], t_nblist *nl,
-                gmx_genborn_t *born, t_mdatoms *md)
-{
-    int   i, k, n, ai, aj, nj0, nj1;
-    int   shift;
-    real  shX, shY, shZ;
-    real  rai, raj, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
-    real  rad, min_rad, rinv, rai_inv;
-    real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
-    real  lij2, uij2, lij3, uij3, t1, t2, t3;
-    real  lij_inv, dlij, sk2_rinv, prod, log_term;
-    real  doffset, raj_inv, dadx_val;
-    real *gb_radius;
-
-    doffset   = born->gb_doffset;
-    gb_radius = born->gb_radius;
-
-    for (i = 0; i < born->nr; i++)
-    {
-        born->gpol_hct_work[i] = 0;
-    }
-
-    /* Keep the compiler happy */
-    n    = 0;
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ai     = nl->iinr[i];
-
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        /* Load shifts for this list */
-        shift   = nl->shift[i];
-        shX     = fr->shift_vec[shift][0];
-        shY     = fr->shift_vec[shift][1];
-        shZ     = fr->shift_vec[shift][2];
-
-        rai     = gb_radius[ai];
-        rai_inv = 1.0/rai;
-
-        sk_ai   = born->param[ai];
-        sk2_ai  = sk_ai*sk_ai;
-
-        /* Load atom i coordinates, add shift vectors */
-        ix1     = shX + x[ai][0];
-        iy1     = shY + x[ai][1];
-        iz1     = shZ + x[ai][2];
-
-        sum_ai  = 0;
-
-        for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
-        {
-            aj    = nl->jjnr[k];
-
-            jx1   = x[aj][0];
-            jy1   = x[aj][1];
-            jz1   = x[aj][2];
-
-            dx11  = ix1 - jx1;
-            dy11  = iy1 - jy1;
-            dz11  = iz1 - jz1;
-
-            dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
-            rinv  = gmx::invsqrt(dr2);
-            dr    = rinv*dr2;
-
-            sk    = born->param[aj];
-            raj   = gb_radius[aj];
-
-            /* aj -> ai interaction */
-            if (rai < dr+sk)
-            {
-                lij     = 1.0/(dr-sk);
-                dlij    = 1.0;
-
-                if (rai > dr-sk)
-                {
-                    lij  = rai_inv;
-                    dlij = 0.0;
-                }
-
-                lij2     = lij*lij;
-                lij3     = lij2*lij;
-
-                uij      = 1.0/(dr+sk);
-                uij2     = uij*uij;
-                uij3     = uij2*uij;
-
-                diff2    = uij2-lij2;
-
-                lij_inv  = gmx::invsqrt(lij2);
-                sk2      = sk*sk;
-                sk2_rinv = sk2*rinv;
-                prod     = 0.25*sk2_rinv;
-
-                log_term = std::log(uij*lij_inv);
-
-                tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
-                    prod*(-diff2);
-
-                if (rai < sk-dr)
-                {
-                    tmp = tmp + 2.0 * (rai_inv-lij);
-                }
-
-                t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
-                /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
-                /* rb2 is moved to chainrule    */
-
-                sum_ai += 0.5*tmp;
-            }
-            else
-            {
-                dadx_val = 0.0;
-            }
-            fr->dadx[n++] = dadx_val;
-
-
-            /* ai -> aj interaction */
-            if (raj < dr + sk_ai)
-            {
-                lij     = 1.0/(dr-sk_ai);
-                dlij    = 1.0;
-                raj_inv = 1.0/raj;
-
-                if (raj > dr-sk_ai)
-                {
-                    lij  = raj_inv;
-                    dlij = 0.0;
-                }
-
-                lij2     = lij  * lij;
-                lij3     = lij2 * lij;
-
-                uij      = 1.0/(dr+sk_ai);
-                uij2     = uij  * uij;
-                uij3     = uij2 * uij;
-
-                diff2    = uij2-lij2;
-
-                lij_inv  = gmx::invsqrt(lij2);
-                sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
-                sk2_rinv = sk2*rinv;
-                prod     = 0.25 * sk2_rinv;
-
-                /* log_term = table_log(uij*lij_inv,born->log_table,
-                   LOG_TABLE_ACCURACY); */
-                log_term = std::log(uij*lij_inv);
-
-                tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
-                    prod*(-diff2);
-
-                if (raj < sk_ai-dr)
-                {
-                    tmp     = tmp + 2.0 * (raj_inv-lij);
-                }
-
-                /* duij = 1.0 */
-                t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
-                /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule    */
-
-                born->gpol_hct_work[aj] += 0.5*tmp;
-            }
-            else
-            {
-                dadx_val = 0.0;
-            }
-            fr->dadx[n++] = dadx_val;
-        }
-
-        born->gpol_hct_work[ai] += sum_ai;
-    }
-
-    /* Parallel summations */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, born->gpol_hct_work);
-    }
-
-    for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-    {
-        if (born->use[i] != 0)
-        {
-            rai     = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
-            sum_ai  = 1.0/rai - born->gpol_hct_work[i];
-            min_rad = rai + doffset;
-            rad     = 1.0/sum_ai;
-
-            born->bRad[i]   = std::max(rad, min_rad);
-            fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
-        }
-    }
-
-    /* Extra communication required for DD */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_spread_real(cr->dd, born->bRad);
-        dd_atom_spread_real(cr->dd, fr->invsqrta);
-    }
-
-
-    return 0;
-}
-
-static int
-calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
-                rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md)
-{
-    int   i, k, ai, aj, nj0, nj1, n;
-    int   shift;
-    real  shX, shY, shZ;
-    real  rai, raj, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
-    real  sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
-    real  log_term, prod, sk2_rinv, sk_ai, sk2_ai;
-    real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
-    real  lij2, uij2, lij3, uij3, dlij, t1, t2, t3;
-    real  doffset, raj_inv, dadx_val;
-    real *gb_radius;
-
-    /* Keep the compiler happy */
-    n    = 0;
-
-    doffset   = born->gb_doffset;
-    gb_radius = born->gb_radius;
-
-    for (i = 0; i < born->nr; i++)
-    {
-        born->gpol_hct_work[i] = 0;
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ai      = nl->iinr[i];
-
-        nj0     = nl->jindex[i];
-        nj1     = nl->jindex[i+1];
-
-        /* Load shifts for this list */
-        shift   = nl->shift[i];
-        shX     = fr->shift_vec[shift][0];
-        shY     = fr->shift_vec[shift][1];
-        shZ     = fr->shift_vec[shift][2];
-
-        rai      = gb_radius[ai];
-        rai_inv  = 1.0/rai;
-
-        sk_ai    = born->param[ai];
-        sk2_ai   = sk_ai*sk_ai;
-
-        /* Load atom i coordinates, add shift vectors */
-        ix1      = shX + x[ai][0];
-        iy1      = shY + x[ai][1];
-        iz1      = shZ + x[ai][2];
-
-        sum_ai   = 0;
-
-        for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
-        {
-            aj    = nl->jjnr[k];
-
-            jx1   = x[aj][0];
-            jy1   = x[aj][1];
-            jz1   = x[aj][2];
-
-            dx11  = ix1 - jx1;
-            dy11  = iy1 - jy1;
-            dz11  = iz1 - jz1;
-
-            dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
-            rinv  = gmx::invsqrt(dr2);
-            dr    = dr2*rinv;
-
-            /* sk is precalculated in init_gb() */
-            sk    = born->param[aj];
-            raj   = gb_radius[aj];
-
-            /* aj -> ai interaction */
-            if (rai < dr+sk)
-            {
-                lij       = 1.0/(dr-sk);
-                dlij      = 1.0;
-
-                if (rai > dr-sk)
-                {
-                    lij  = rai_inv;
-                    dlij = 0.0;
-                }
-
-                uij      = 1.0/(dr+sk);
-                lij2     = lij  * lij;
-                lij3     = lij2 * lij;
-                uij2     = uij  * uij;
-                uij3     = uij2 * uij;
-
-                diff2    = uij2-lij2;
-
-                lij_inv  = gmx::invsqrt(lij2);
-                sk2      = sk*sk;
-                sk2_rinv = sk2*rinv;
-                prod     = 0.25*sk2_rinv;
-
-                log_term = std::log(uij*lij_inv);
-
-                tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
-
-                if (rai < sk-dr)
-                {
-                    tmp = tmp + 2.0 * (rai_inv-lij);
-                }
-
-                /* duij    = 1.0; */
-                t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
-
-                sum_ai += 0.5*tmp;
-            }
-            else
-            {
-                dadx_val = 0.0;
-            }
-            fr->dadx[n++] = dadx_val;
-
-            /* ai -> aj interaction */
-            if (raj < dr + sk_ai)
-            {
-                lij     = 1.0/(dr-sk_ai);
-                dlij    = 1.0;
-                raj_inv = 1.0/raj;
-
-                if (raj > dr-sk_ai)
-                {
-                    lij  = raj_inv;
-                    dlij = 0.0;
-                }
-
-                lij2     = lij  * lij;
-                lij3     = lij2 * lij;
-
-                uij      = 1.0/(dr+sk_ai);
-                uij2     = uij  * uij;
-                uij3     = uij2 * uij;
-
-                diff2    = uij2-lij2;
-
-                lij_inv  = gmx::invsqrt(lij2);
-                sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
-                sk2_rinv = sk2*rinv;
-                prod     = 0.25 * sk2_rinv;
-
-                /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                log_term = std::log(uij*lij_inv);
-
-                tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
-
-                if (raj < sk_ai-dr)
-                {
-                    tmp     = tmp + 2.0 * (raj_inv-lij);
-                }
-
-                t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
-
-                born->gpol_hct_work[aj] += 0.5*tmp;
-
-            }
-            else
-            {
-                dadx_val = 0.0;
-            }
-            fr->dadx[n++] = dadx_val;
-
-        }
-        born->gpol_hct_work[ai] += sum_ai;
-
-    }
-
-    /* Parallel summations */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, born->gpol_hct_work);
-    }
-
-    for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-    {
-        if (born->use[i] != 0)
-        {
-            rai        = top->atomtypes.gb_radius[md->typeA[i]];
-            rai_inv2   = 1.0/rai;
-            rai        = rai-doffset;
-            rai_inv    = 1.0/rai;
-            sum_ai     = rai * born->gpol_hct_work[i];
-            sum_ai2    = sum_ai  * sum_ai;
-            sum_ai3    = sum_ai2 * sum_ai;
-
-            tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
-            born->bRad[i] = rai_inv - tsum*rai_inv2;
-            born->bRad[i] = 1.0 / born->bRad[i];
-
-            fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
-
-            tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
-            born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
-        }
-    }
-
-    /* Extra (local) communication required for DD */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_spread_real(cr->dd, born->bRad);
-        dd_atom_spread_real(cr->dd, fr->invsqrta);
-        dd_atom_spread_real(cr->dd, born->drobc);
-    }
-
-    return 0;
-
-}
-
-
-
-int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top,
-                rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb     *nrnb)
-{
-    int   cnt;
-    int   ndadx;
-
-    if (fr->bAllvsAll && fr->dadx == nullptr)
-    {
-        /* We might need up to 8 atoms of padding before and after,
-         * and another 4 units to guarantee SSE alignment.
-         */
-        fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12);
-        snew(fr->dadx_rawptr, fr->nalloc_dadx);
-        fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
-    }
-    else
-    {
-        /* In the SSE-enabled gb-loops, when writing to dadx, we
-         * always write 2*4 elements at a time, even in the case with only
-         * 1-3 j particles, where we only really need to write 2*(1-3)
-         * elements. This is because we want dadx to be aligned to a 16-
-         * byte boundary, and being able to use _mm_store/load_ps
-         */
-        ndadx = 2 * (nl->nrj + 3*nl->nri);
-
-        /* First, reallocate the dadx array, we need 3 extra for SSE */
-        if (ndadx + 3 > fr->nalloc_dadx)
-        {
-            fr->nalloc_dadx = over_alloc_large(ndadx) + 3;
-            srenew(fr->dadx_rawptr, fr->nalloc_dadx);
-            fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
-        }
-    }
-
-    if (fr->bAllvsAll)
-    {
-        cnt = md->homenr*(md->nr/2+1);
-
-        if (ir->gb_algorithm == egbSTILL)
-        {
-            genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], &fr->AllvsAll_workgb);
-            /* 13 flops in outer loop, 47 flops in inner loop */
-            inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47);
-        }
-        else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
-        {
-            genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], &fr->AllvsAll_workgb);
-            /* 24 flops in outer loop, 183 in inner */
-            inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183);
-        }
-        else
-        {
-            gmx_fatal(FARGS, "Bad gb algorithm for all-vs-all interactions");
-        }
-        return 0;
-    }
-
-    /* Switch for determining which algorithm to use for Born radii calculation */
-#if GMX_DOUBLE
-
-    switch (ir->gb_algorithm)
-    {
-        case egbSTILL:
-            calc_gb_rad_still(cr, fr, top, x, nl, born, md);
-            break;
-        case egbHCT:
-            calc_gb_rad_hct(cr, fr, top, x, nl, born, md);
-            break;
-        case egbOBC:
-            calc_gb_rad_obc(cr, fr, top, x, nl, born, md);
-            break;
-
-        default:
-            gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm);
-    }
-
-#else
-
-    switch (ir->gb_algorithm)
-    {
-        case egbSTILL:
-            calc_gb_rad_still(cr, fr, top, x, nl, born, md);
-            break;
-        case egbHCT:
-            calc_gb_rad_hct(cr, fr, top, x, nl, born, md);
-            break;
-        case egbOBC:
-            calc_gb_rad_obc(cr, fr, top, x, nl, born, md);
-            break;
-
-        default:
-            gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm);
-    }
-
-#endif /* Double or single precision */
-
-    if (fr->bAllvsAll == FALSE)
-    {
-        switch (ir->gb_algorithm)
-        {
-            case egbSTILL:
-                /* 17 flops per outer loop iteration, 47 flops per inner loop */
-                inc_nrnb(nrnb, eNR_BORN_RADII_STILL, nl->nri*17+nl->nrj*47);
-                break;
-            case egbHCT:
-            case egbOBC:
-                /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
-                inc_nrnb(nrnb, eNR_BORN_RADII_HCT_OBC, nl->nri*61+nl->nrj*183);
-                break;
-
-            default:
-                break;
-        }
-    }
-
-    return 0;
-}
-
-
-
-real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale,
-                  real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
-                  real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
-{
-    int      i, j, n0, m, nnn, ai, aj;
-    int      ki;
-
-    real     isai, isaj;
-    real     r, rsq11;
-    real     rinv11, iq;
-    real     isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2;
-    real     vgb, fgb, fijC, dvdatmp, fscal;
-    real     vctot;
-
-    rvec     dx;
-    ivec     dt;
-
-    t_iatom *forceatoms;
-
-    /* Scale the electrostatics by gb_epsilon_solvent */
-    facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent);
-
-    gbtabscale = *p_gbtabscale;
-    vctot      = 0.0;
-
-    for (j = F_GB12; j <= F_GB14; j++)
-    {
-        forceatoms = idef->il[j].iatoms;
-
-        for (i = 0; i < idef->il[j].nr; )
-        {
-            /* To avoid reading in the interaction type, we just increment i to pass over
-             * the types in the forceatoms array, this saves some memory accesses
-             */
-            i++;
-            ai            = forceatoms[i++];
-            aj            = forceatoms[i++];
-
-            ki            = pbc_rvec_sub(pbc, x[ai], x[aj], dx);
-            rsq11         = iprod(dx, dx);
-
-            isai          = invsqrta[ai];
-            iq            = (-1)*facel*charge[ai];
-
-            rinv11        = gmx::invsqrt(rsq11);
-            isaj          = invsqrta[aj];
-            isaprod       = isai*isaj;
-            qq            = isaprod*iq*charge[aj];
-            gbscale       = isaprod*gbtabscale;
-            r             = rsq11*rinv11;
-            rt            = r*gbscale;
-            n0            = static_cast<int>(rt);
-            eps           = rt-n0;
-            eps2          = eps*eps;
-            nnn           = 4*n0;
-            Y             = GBtab[nnn];
-            F             = GBtab[nnn+1];
-            Geps          = eps*GBtab[nnn+2];
-            Heps2         = eps2*GBtab[nnn+3];
-            Fp            = F+Geps+Heps2;
-            VV            = Y+eps*Fp;
-            FF            = Fp+Geps+2.0*Heps2;
-            vgb           = qq*VV;
-            fijC          = qq*FF*gbscale;
-            dvdatmp       = -(vgb+fijC*r)*0.5;
-            dvda[aj]      = dvda[aj] + dvdatmp*isaj*isaj;
-            dvda[ai]      = dvda[ai] + dvdatmp*isai*isai;
-            vctot         = vctot + vgb;
-            fgb           = -(fijC)*rinv11;
-
-            if (graph)
-            {
-                ivec_sub(SHIFT_IVEC(graph, ai), SHIFT_IVEC(graph, aj), dt);
-                ki = IVEC2IS(dt);
-            }
-
-            for (m = 0; (m < DIM); m++)             /*  15		*/
-            {
-                fscal               = fgb*dx[m];
-                f[ai][m]           += fscal;
-                f[aj][m]           -= fscal;
-                fshift[ki][m]      += fscal;
-                fshift[CENTRAL][m] -= fscal;
-            }
-        }
-    }
-
-    return vctot;
-}
-
-static real calc_gb_selfcorrections(t_commrec *cr, int natoms,
-                                    real *charge, gmx_genborn_t *born, real *dvda, double facel)
-{
-    int  i, ai, at0, at1;
-    real rai, e, derb, q, q2, fi, rai_inv, vtot;
-
-    if (DOMAINDECOMP(cr))
-    {
-        at0 = 0;
-        at1 = cr->dd->nat_home;
-    }
-    else
-    {
-        at0 = 0;
-        at1 = natoms;
-
-    }
-
-    /* Scale the electrostatics by gb_epsilon_solvent */
-    facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent);
-
-    vtot = 0.0;
-
-    /* Apply self corrections */
-    for (i = at0; i < at1; i++)
-    {
-        ai       = i;
-
-        if (born->use[ai] == 1)
-        {
-            rai       = born->bRad[ai];
-            rai_inv   = 1.0/rai;
-            q         = charge[ai];
-            q2        = q*q;
-            fi        = facel*q2;
-            e         = fi*rai_inv;
-            derb      = 0.5*e*rai_inv*rai_inv;
-            dvda[ai] += derb*rai;
-            vtot     -= 0.5*e;
-        }
-    }
-
-    return vtot;
-
-}
-
-static real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *born, gmx_localtop_t *top,
-                             real *dvda, t_mdatoms *md)
-{
-    int  ai, i, at0, at1;
-    real e, es, rai, term, probe, tmp, factor;
-    real rbi_inv, rbi_inv2;
-
-    if (DOMAINDECOMP(cr))
-    {
-        at0 = 0;
-        at1 = cr->dd->nat_home;
-    }
-    else
-    {
-        at0 = 0;
-        at1 = natoms;
-    }
-
-    /* factor is the surface tension */
-    factor = born->sa_surface_tension;
-
-    es    = 0;
-    probe = 0.14;
-    term  = M_PI*4;
-
-    for (i = at0; i < at1; i++)
-    {
-        ai        = i;
-
-        if (born->use[ai] == 1)
-        {
-            rai       = top->atomtypes.gb_radius[md->typeA[ai]];
-            rbi_inv   = fr->invsqrta[ai];
-            rbi_inv2  = rbi_inv * rbi_inv;
-            tmp       = (rai*rbi_inv2)*(rai*rbi_inv2);
-            tmp       = tmp*tmp*tmp;
-            e         = factor*term*(rai+probe)*(rai+probe)*tmp;
-            dvda[ai]  = dvda[ai] - 6*e*rbi_inv2;
-            es        = es + e;
-        }
-    }
-
-    return es;
-}
-
-
-
-static real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[],
-                              rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born)
-{
-    int          i, k, n, ai, aj, nj0, nj1, n0, n1;
-    int          shift;
-    real         shX, shY, shZ;
-    real         fgb, rbi, fix1, fiy1, fiz1;
-    real         ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
-    real         tx, ty, tz, rbai, rbaj, fgb_ai;
-    real        *rb;
-
-    n  = 0;
-    rb = born->work;
-
-    n0 = 0;
-    n1 = natoms;
-
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
-        }
-    }
-    else if (gb_algorithm == egbHCT)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * dvda[i];
-        }
-    }
-    else if (gb_algorithm == egbOBC)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
-        }
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ai   = nl->iinr[i];
-
-        nj0  = nl->jindex[i];
-        nj1  = nl->jindex[i+1];
-
-        /* Load shifts for this list */
-        shift   = nl->shift[i];
-        shX     = shift_vec[shift][0];
-        shY     = shift_vec[shift][1];
-        shZ     = shift_vec[shift][2];
-
-        /* Load atom i coordinates, add shift vectors */
-        ix1  = shX + x[ai][0];
-        iy1  = shY + x[ai][1];
-        iz1  = shZ + x[ai][2];
-
-        fix1 = 0;
-        fiy1 = 0;
-        fiz1 = 0;
-
-        rbai = rb[ai];
-
-        for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
-        {
-            aj = nl->jjnr[k];
-
-            jx1     = x[aj][0];
-            jy1     = x[aj][1];
-            jz1     = x[aj][2];
-
-            dx11    = ix1 - jx1;
-            dy11    = iy1 - jy1;
-            dz11    = iz1 - jz1;
-
-            rbaj    = rb[aj];
-
-            fgb     = rbai*dadx[n++];
-            fgb_ai  = rbaj*dadx[n++];
-
-            /* Total force between ai and aj is the sum of ai->aj and aj->ai */
-            fgb     = fgb + fgb_ai;
-
-            tx      = fgb * dx11;
-            ty      = fgb * dy11;
-            tz      = fgb * dz11;
-
-            fix1    = fix1 + tx;
-            fiy1    = fiy1 + ty;
-            fiz1    = fiz1 + tz;
-
-            /* Update force on atom aj */
-            t[aj][0] = t[aj][0] - tx;
-            t[aj][1] = t[aj][1] - ty;
-            t[aj][2] = t[aj][2] - tz;
-        }
-
-        /* Update force and shift forces on atom ai */
-        t[ai][0] = t[ai][0] + fix1;
-        t[ai][1] = t[ai][1] + fiy1;
-        t[ai][2] = t[ai][2] + fiz1;
-
-        fshift[shift][0] = fshift[shift][0] + fix1;
-        fshift[shift][1] = fshift[shift][1] + fiy1;
-        fshift[shift][2] = fshift[shift][2] + fiz1;
-
-    }
-
-    return 0;
-}
-
-
-void
-calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top,
-               rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb,
-               const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd)
-{
-    int  cnt;
-
-    /* PBC or not? */
-    const t_pbc *pbc_null;
-
-    if (fr->bMolPBC)
-    {
-        pbc_null = pbc;
-    }
-    else
-    {
-        pbc_null = nullptr;
-    }
-
-    if (sa_algorithm == esaAPPROX)
-    {
-        /* Do a simple ACE type approximation for the non-polar solvation */
-        enerd->term[F_NPSOLVATION] += calc_gb_nonpolar(cr, fr, born->nr, born, top, fr->dvda, md);
-    }
-
-    /* Calculate the bonded GB-interactions using either table or analytical formula */
-    enerd->term[F_GBPOL]       += gb_bonds_tab(x, f, fr->fshift, md->chargeA, &(fr->gbtabscale),
-                                               fr->invsqrta, fr->dvda, fr->gbtab->data, idef, born->epsilon_r, born->gb_epsilon_solvent, fr->ic->epsfac, pbc_null, graph);
-
-    /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
-    enerd->term[F_GBPOL]       += calc_gb_selfcorrections(cr, born->nr, md->chargeA, born, fr->dvda, fr->ic->epsfac);
-
-    /* If parallel, sum the derivative of the potential w.r.t the born radii */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, fr->dvda);
-        dd_atom_spread_real(cr->dd, fr->dvda);
-    }
-
-    if (fr->bAllvsAll)
-    {
-        genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-        cnt = md->homenr*(md->nr/2+1);
-        /* 9 flops for outer loop, 15 for inner */
-        inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15);
-        return;
-    }
-
-    calc_gb_chainrule(fr->natoms_force, fr->gblist, fr->dadx, fr->dvda,
-                      x, f, fr->fshift, fr->shift_vec, gb_algorithm, born);
-
-    if (!fr->bAllvsAll)
-    {
-        /* 9 flops for outer loop, 15 for inner */
-        inc_nrnb(nrnb, eNR_BORN_CHAINRULE, fr->gblist->nri*9+fr->gblist->nrj*15);
-    }
-}
-
-static void add_j_to_gblist(gbtmpnbl_t *list, int aj)
-{
-    if (list->naj >= list->aj_nalloc)
-    {
-        list->aj_nalloc = over_alloc_large(list->naj+1);
-        srenew(list->aj, list->aj_nalloc);
-    }
-
-    list->aj[list->naj++] = aj;
-}
-
-static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists, int shift)
-{
-    int ind, i;
-
-    /* Search the list with the same shift, if there is one */
-    ind = 0;
-    while (ind < lists->nlist && shift != lists->list[ind].shift)
-    {
-        ind++;
-    }
-    if (ind == lists->nlist)
-    {
-        if (lists->nlist == lists->list_nalloc)
-        {
-            lists->list_nalloc++;
-            srenew(lists->list, lists->list_nalloc);
-            for (i = lists->nlist; i < lists->list_nalloc; i++)
-            {
-                lists->list[i].aj        = nullptr;
-                lists->list[i].aj_nalloc = 0;
-            }
-
-        }
-
-        lists->list[lists->nlist].shift = shift;
-        lists->list[lists->nlist].naj   = 0;
-        lists->nlist++;
-    }
-
-    return &lists->list[ind];
-}
-
-static void add_bondeds_to_gblist(t_ilist *il,
-                                  gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x,
-                                  struct gbtmpnbls *nls)
-{
-    int         ind, j, ai, aj, found;
-    rvec        dx;
-    ivec        dt;
-    gbtmpnbl_t *list;
-
-    for (ind = 0; ind < il->nr; ind += 3)
-    {
-        ai = il->iatoms[ind+1];
-        aj = il->iatoms[ind+2];
-
-        int shift = CENTRAL;
-        if (g != nullptr)
-        {
-            rvec_sub(x[ai], x[aj], dx);
-            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
-            shift = IVEC2IS(dt);
-        }
-        else if (bMolPBC)
-        {
-            shift = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
-        }
-
-        /* Find the list for this shift or create one */
-        list = find_gbtmplist(&nls[ai], shift);
-
-        found = 0;
-
-        /* So that we do not add the same bond twice.
-         * This happens with some constraints between 1-3 atoms
-         * that are in the bond-list but should not be in the GB nb-list */
-        for (j = 0; j < list->naj; j++)
-        {
-            if (list->aj[j] == aj)
-            {
-                found = 1;
-            }
-        }
-
-        if (found == 0)
-        {
-            if (ai == aj)
-            {
-                gmx_incons("ai == aj");
-            }
-
-            add_j_to_gblist(list, aj);
-        }
-    }
-}
-
-
-int make_gb_nblist(t_commrec *cr, int gb_algorithm,
-                   rvec x[], matrix box,
-                   t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
-{
-    int               i, j, k, n, nj0, nj1, ai, shift, s;
-    t_nblist         *nblist;
-    t_pbc             pbc;
-
-    struct gbtmpnbls *nls;
-    gbtmpnbl_t       *list = nullptr;
-
-    set_pbc(&pbc, fr->ePBC, box);
-    nls   = born->nblist_work;
-
-    for (i = 0; i < born->nr; i++)
-    {
-        nls[i].nlist = 0;
-    }
-
-    if (fr->bMolPBC)
-    {
-        set_pbc_dd(&pbc, fr->ePBC, cr->dd->nc, TRUE, box);
-    }
-
-    switch (gb_algorithm)
-    {
-        case egbHCT:
-        case egbOBC:
-            /* Loop over 1-2, 1-3 and 1-4 interactions */
-            for (j = F_GB12; j <= F_GB14; j++)
-            {
-                add_bondeds_to_gblist(&idef->il[j], fr->bMolPBC, &pbc, graph, x, nls);
-            }
-            break;
-        case egbSTILL:
-            /* Loop over 1-4 interactions */
-            add_bondeds_to_gblist(&idef->il[F_GB14], fr->bMolPBC, &pbc, graph, x, nls);
-            break;
-        default:
-            gmx_incons("Unknown GB algorithm");
-    }
-
-    /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
-    for (n = 0; (n < fr->nnblists); n++)
-    {
-        for (i = 0; (i < eNL_NR); i++)
-        {
-            nblist = &(fr->nblists[n].nlist_sr[i]);
-
-            if (nblist->nri > 0 && (i == eNL_VDWQQ || i == eNL_QQ))
-            {
-                for (j = 0; j < nblist->nri; j++)
-                {
-                    ai    = nblist->iinr[j];
-                    shift = nblist->shift[j];
-
-                    /* Find the list for this shift or create one */
-                    list = find_gbtmplist(&nls[ai], shift);
-
-                    nj0 = nblist->jindex[j];
-                    nj1 = nblist->jindex[j+1];
-
-                    /* Add all the j-atoms in the non-bonded list to the GB list */
-                    for (k = nj0; k < nj1; k++)
-                    {
-                        add_j_to_gblist(list, nblist->jjnr[k]);
-                    }
-                }
-            }
-        }
-    }
-
-    /* Zero out some counters */
-    fr->gblist->nri = 0;
-    fr->gblist->nrj = 0;
-
-    fr->gblist->jindex[0] = fr->gblist->nri;
-
-    for (i = 0; i < fr->natoms_force; i++)
-    {
-        for (s = 0; s < nls[i].nlist; s++)
-        {
-            list = &nls[i].list[s];
-
-            /* Only add those atoms that actually have neighbours */
-            if (born->use[i] != 0)
-            {
-                fr->gblist->iinr[fr->gblist->nri]  = i;
-                fr->gblist->shift[fr->gblist->nri] = list->shift;
-                fr->gblist->nri++;
-
-                for (k = 0; k < list->naj; k++)
-                {
-                    /* Memory allocation for jjnr */
-                    if (fr->gblist->nrj >= fr->gblist->maxnrj)
-                    {
-                        fr->gblist->maxnrj += over_alloc_large(fr->gblist->maxnrj);
-
-                        if (debug)
-                        {
-                            fprintf(debug, "Increasing GB neighbourlist j size to %d\n", fr->gblist->maxnrj);
-                        }
-
-                        srenew(fr->gblist->jjnr, fr->gblist->maxnrj);
-                    }
-
-                    /* Put in list */
-                    if (i == list->aj[k])
-                    {
-                        gmx_incons("i == list->aj[k]");
-                    }
-                    fr->gblist->jjnr[fr->gblist->nrj++] = list->aj[k];
-                }
-
-                fr->gblist->jindex[fr->gblist->nri] = fr->gblist->nrj;
-            }
-        }
-    }
-
-    return 0;
-}
-
-void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm)
-{
-    int           i, at0, at1;
-    gmx_domdec_t *dd = nullptr;
-
-    if (DOMAINDECOMP(cr))
-    {
-        dd  = cr->dd;
-        at0 = 0;
-        at1 = dd->nat_tot;
-    }
-    else
-    {
-        /* Single node, just copy pointers and return */
-        if (gb_algorithm == egbSTILL)
-        {
-            born->gpol      = born->gpol_globalindex;
-            born->vsolv     = born->vsolv_globalindex;
-            born->gb_radius = born->gb_radius_globalindex;
-        }
-        else
-        {
-            born->param     = born->param_globalindex;
-            born->gb_radius = born->gb_radius_globalindex;
-        }
-
-        born->use = born->use_globalindex;
-
-        return;
-    }
-
-    /* Reallocation of local arrays if necessary */
-    /* fr->natoms_force is equal to dd->nat_tot */
-    if (DOMAINDECOMP(cr) && dd->nat_tot > born->nalloc)
-    {
-        int nalloc;
-
-        nalloc = dd->nat_tot;
-
-        /* Arrays specific to different gb algorithms */
-        if (gb_algorithm == egbSTILL)
-        {
-            srenew(born->gpol,  nalloc+3);
-            srenew(born->vsolv, nalloc+3);
-            srenew(born->gb_radius, nalloc+3);
-            for (i = born->nalloc; (i < nalloc+3); i++)
-            {
-                born->gpol[i]      = 0;
-                born->vsolv[i]     = 0;
-                born->gb_radius[i] = 0;
-            }
-        }
-        else
-        {
-            srenew(born->param, nalloc+3);
-            srenew(born->gb_radius, nalloc+3);
-            for (i = born->nalloc; (i < nalloc+3); i++)
-            {
-                born->param[i]     = 0;
-                born->gb_radius[i] = 0;
-            }
-        }
-
-        /* All gb-algorithms use the array for vsites exclusions */
-        srenew(born->use,    nalloc+3);
-        for (i = born->nalloc; (i < nalloc+3); i++)
-        {
-            born->use[i] = 0;
-        }
-
-        born->nalloc = nalloc;
-    }
-
-    /* With dd, copy algorithm specific arrays */
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = at0; i < at1; i++)
-        {
-            born->gpol[i]      = born->gpol_globalindex[dd->gatindex[i]];
-            born->vsolv[i]     = born->vsolv_globalindex[dd->gatindex[i]];
-            born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
-            born->use[i]       = born->use_globalindex[dd->gatindex[i]];
-        }
-    }
-    else
-    {
-        for (i = at0; i < at1; i++)
-        {
-            born->param[i]     = born->param_globalindex[dd->gatindex[i]];
-            born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
-            born->use[i]       = born->use_globalindex[dd->gatindex[i]];
-        }
-    }
-}
diff --git a/src/gromacs/mdlib/genborn.h b/src/gromacs/mdlib/genborn.h
deleted file mode 100644
index a631788a0d..0000000000
--- a/src/gromacs/mdlib/genborn.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef GMX_MDLIB_GENBORN_H
-#define GMX_MDLIB_GENBORN_H
-
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vectypes.h"
-
-struct gmx_genborn_t;
-struct gmx_enerdata_t;
-struct gmx_localtop_t;
-struct gmx_mtop_t;
-struct t_commrec;
-struct t_forcerec;
-struct t_graph;
-struct t_idef;
-struct t_inputrec;
-struct t_mdatoms;
-struct t_nblist;
-struct t_nrnb;
-struct t_pbc;
-
-typedef struct
-{
-    int  nbonds;
-    int  bond[10];
-    real length[10];
-} genborn_bonds_t;
-
-typedef struct gbtmpnbls *gbtmpnbls_t;
-
-/* Struct to hold all the information for GB */
-typedef struct gmx_genborn_t
-{
-    int nr;                   /* number of atoms, length of arrays below */
-    int n12;                  /* number of 1-2 (bond) interactions       */
-    int n13;                  /* number of 1-3 (angle) terms             */
-    int n14;                  /* number of 1-4 (torsion) terms           */
-    int nalloc;               /* Allocation of local arrays (with DD)    */
-
-
-    /* Arrays below that end with _globalindex are used for setting up initial values of
-     * all gb parameters and values. They all have length natoms, which for DD is the
-     * global atom number.
-     * Values are then taken from these arrays to local copies, that have names without
-     * _globalindex, in the routine make_local_gb(), which is called once for single
-     * node runs, and for DD at every call to dd_partition_system
-     */
-
-    real       *gpol;              /* Atomic polarisation energies */
-    real       *gpol_globalindex;  /*  */
-    real       *gpol_still_work;   /* Work array for Still model */
-    real       *gpol_hct_work;     /* Work array for HCT/OBC models */
-    real       *bRad;              /* Atomic Born radii */
-    real       *vsolv;             /* Atomic solvation volumes */
-    real       *vsolv_globalindex; /*  */
-    real       *gb_radius;         /* Radius info, copied from atomtypes */
-    real       *gb_radius_globalindex;
-
-    int        *use;                /* Array that till if this atom does GB */
-    int        *use_globalindex;    /* Global array for parallelization */
-
-    real        es;                 /* Solvation energy and derivatives */
-    real       *asurf;              /* Atomic surface area */
-    rvec       *dasurf;             /* Surface area derivatives */
-    real        as;                 /* Total surface area */
-
-    real       *drobc;              /* Parameters for OBC chain rule calculation */
-    real       *param;              /* Precomputed factor rai*atype->S_hct for HCT/OBC */
-    real       *param_globalindex;  /*  */
-
-    real       *log_table;          /* Table for logarithm lookup */
-
-    real        obc_alpha;          /* OBC parameters */
-    real        obc_beta;           /* OBC parameters */
-    real        obc_gamma;          /* OBC parameters */
-    real        gb_doffset;         /* Dielectric offset for Still/HCT/OBC */
-    real        gb_epsilon_solvent; /*   */
-    real        epsilon_r;          /* Used for inner dielectric */
-
-    real        sa_surface_tension; /* Surface tension for non-polar solvation */
-
-    real       *work;               /* Used for parallel summation and in the chain rule, length natoms         */
-    real       *buf;                /* Used for parallel summation and in the chain rule, length natoms         */
-    int        *count;              /* Used for setting up the special gb nblist, length natoms                 */
-    gbtmpnbls_t nblist_work;        /* Used for setting up the special gb nblist, dim natoms*nblist_work_nalloc */
-    int         nblist_work_nalloc; /* Length of second dimension of nblist_work                                */
-}
-gmx_genborn_t;
-/* Still parameters - make sure to edit in genborn_sse.c too if you change these! */
-#define STILL_P1  0.073*0.1              /* length        */
-#define STILL_P2  0.921*0.1*CAL2JOULE    /* energy*length */
-#define STILL_P3  6.211*0.1*CAL2JOULE    /* energy*length */
-#define STILL_P4  15.236*0.1*CAL2JOULE
-#define STILL_P5  1.254
-
-#define STILL_P5INV (1.0/STILL_P5)
-#define STILL_PIP5  (M_PI*STILL_P5)
-
-
-/* Initialise GB stuff */
-int init_gb(struct gmx_genborn_t **p_born,
-            struct t_forcerec *fr, const struct t_inputrec *ir,
-            const gmx_mtop_t *mtop, int gb_algorithm);
-
-
-/* Born radii calculations, both with and without SSE acceleration */
-int calc_gb_rad(struct t_commrec *cr, struct t_forcerec *fr, struct t_inputrec *ir, gmx_localtop_t *top, rvec x[], t_nblist *nl, struct gmx_genborn_t *born, t_mdatoms *md, t_nrnb     *nrnb);
-
-
-
-/* Bonded GB interactions */
-real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale,
-                  real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
-                  real gb_epsilon_solvent, real facel, const struct t_pbc *pbc,
-                  const struct t_graph *graph);
-
-
-
-
-/* Functions for calculating adjustments due to ie chain rule terms */
-void
-calc_gb_forces(struct t_commrec *cr, t_mdatoms *md, struct gmx_genborn_t *born, gmx_localtop_t *top,
-               rvec x[], rvec f[], struct t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb,
-               const struct t_pbc *pbc, const struct t_graph *graph, struct gmx_enerdata_t *enerd);
-
-
-int
-make_gb_nblist(struct t_commrec *cr, int gb_algorithm,
-               rvec x[], matrix box,
-               struct t_forcerec *fr, t_idef *idef, struct t_graph *graph, struct gmx_genborn_t *born);
-
-void
-make_local_gb(const struct t_commrec *cr, struct gmx_genborn_t *born, int gb_algorithm);
-
-#endif
diff --git a/src/gromacs/mdlib/genborn_allvsall.cpp b/src/gromacs/mdlib/genborn_allvsall.cpp
deleted file mode 100644
index 1d96860895..0000000000
--- a/src/gromacs/mdlib/genborn_allvsall.cpp
+++ /dev/null
@@ -1,1108 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014,2015,2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "genborn_allvsall.h"
-
-#include <cmath>
-
-#include <algorithm>
-
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/genborn.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/utility/smalloc.h"
-
-
-typedef struct
-{
-    int *      jindex_gb;
-    int **     exclusion_mask_gb;
-}
-gmx_allvsallgb2_data_t;
-
-static int
-calc_maxoffset(int i, int natoms)
-{
-    int maxoffset;
-
-    if ((natoms % 2) == 1)
-    {
-        /* Odd number of atoms, easy */
-        maxoffset = natoms/2;
-    }
-    else if ((natoms % 4) == 0)
-    {
-        /* Multiple of four is hard */
-        if (i < natoms/2)
-        {
-            if ((i % 2) == 0)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-        else
-        {
-            if ((i % 2) == 1)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-    }
-    else
-    {
-        /* natoms/2 = odd */
-        if ((i % 2) == 0)
-        {
-            maxoffset = natoms/2;
-        }
-        else
-        {
-            maxoffset = natoms/2-1;
-        }
-    }
-
-    return maxoffset;
-}
-
-static void
-setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
-                                t_ilist     *                  ilist,
-                                int                            natoms,
-                                gmx_bool                       bInclude12,
-                                gmx_bool                       bInclude13,
-                                gmx_bool                       bInclude14)
-{
-    int i, j, k;
-    int a1, a2;
-    int max_offset;
-    int max_excl_offset;
-
-    /* This routine can appear to be a bit complex, but it is mostly book-keeping.
-     * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
-     * whether they should interact or not.
-     *
-     * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
-     * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
-     * we create a jindex array with three elements per i atom: the starting point, the point to
-     * which we need to check exclusions, and the end point.
-     * This way we only have to allocate a short exclusion mask per i atom.
-     */
-
-    /* Allocate memory for jindex arrays */
-    snew(aadata->jindex_gb, 3*natoms);
-
-    /* Pointer to lists with exclusion masks */
-    snew(aadata->exclusion_mask_gb, natoms);
-
-    for (i = 0; i < natoms; i++)
-    {
-        /* Start */
-        aadata->jindex_gb[3*i]       = i+1;
-        max_offset                   = calc_maxoffset(i, natoms);
-
-        /* first check the max range of atoms to EXCLUDE */
-        max_excl_offset = 0;
-        if (!bInclude12)
-        {
-            for (j = 0; j < ilist[F_GB12].nr; j += 3)
-            {
-                a1 = ilist[F_GB12].iatoms[j+1];
-                a2 = ilist[F_GB12].iatoms[j+2];
-
-                if (a1 == i)
-                {
-                    k = a2-a1;
-                }
-                else if (a2 == i)
-                {
-                    k = a1+natoms-a2;
-                }
-                else
-                {
-                    continue;
-                }
-                if (k > 0 && k <= max_offset)
-                {
-                    max_excl_offset = std::max(k, max_excl_offset);
-                }
-            }
-        }
-        if (!bInclude13)
-        {
-            for (j = 0; j < ilist[F_GB13].nr; j += 3)
-            {
-                a1 = ilist[F_GB13].iatoms[j+1];
-                a2 = ilist[F_GB13].iatoms[j+2];
-
-
-                if (a1 == i)
-                {
-                    k = a2-a1;
-                }
-                else if (a2 == i)
-                {
-                    k = a1+natoms-a2;
-                }
-                else
-                {
-                    continue;
-                }
-                if (k > 0 && k <= max_offset)
-                {
-                    max_excl_offset = std::max(k, max_excl_offset);
-                }
-            }
-        }
-        if (!bInclude14)
-        {
-            for (j = 0; j < ilist[F_GB14].nr; j += 3)
-            {
-                a1 = ilist[F_GB14].iatoms[j+1];
-                a2 = ilist[F_GB14].iatoms[j+2];
-
-
-                if (a1 == i)
-                {
-                    k = a2-a1;
-                }
-                else if (a2 == i)
-                {
-                    k = a1+natoms-a2;
-                }
-                else
-                {
-                    continue;
-                }
-                if (k > 0 && k <= max_offset)
-                {
-                    max_excl_offset = std::max(k, max_excl_offset);
-                }
-            }
-        }
-        max_excl_offset = std::min(max_offset, max_excl_offset);
-
-        aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
-
-        snew(aadata->exclusion_mask_gb[i], max_excl_offset);
-
-        /* Include everything by default */
-        for (j = 0; j < max_excl_offset; j++)
-        {
-            /* Use all-ones to mark interactions that should be present, compatible with SSE */
-            aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
-        }
-        /* Go through exclusions again */
-        if (!bInclude12)
-        {
-            for (j = 0; j < ilist[F_GB12].nr; j += 3)
-            {
-                a1 = ilist[F_GB12].iatoms[j+1];
-                a2 = ilist[F_GB12].iatoms[j+2];
-
-                if (a1 == i)
-                {
-                    k = a2-a1;
-                }
-                else if (a2 == i)
-                {
-                    k = a1+natoms-a2;
-                }
-                else
-                {
-                    continue;
-                }
-                if (k > 0 && k <= max_offset)
-                {
-                    aadata->exclusion_mask_gb[i][k-1] = 0;
-                }
-            }
-        }
-        if (!bInclude13)
-        {
-            for (j = 0; j < ilist[F_GB13].nr; j += 3)
-            {
-                a1 = ilist[F_GB13].iatoms[j+1];
-                a2 = ilist[F_GB13].iatoms[j+2];
-
-                if (a1 == i)
-                {
-                    k = a2-a1;
-                }
-                else if (a2 == i)
-                {
-                    k = a1+natoms-a2;
-                }
-                else
-                {
-                    continue;
-                }
-                if (k > 0 && k <= max_offset)
-                {
-                    aadata->exclusion_mask_gb[i][k-1] = 0;
-                }
-            }
-        }
-        if (!bInclude14)
-        {
-            for (j = 0; j < ilist[F_GB14].nr; j += 3)
-            {
-                a1 = ilist[F_GB14].iatoms[j+1];
-                a2 = ilist[F_GB14].iatoms[j+2];
-
-                if (a1 == i)
-                {
-                    k = a2-a1;
-                }
-                else if (a2 == i)
-                {
-                    k = a1+natoms-a2;
-                }
-                else
-                {
-                    continue;
-                }
-                if (k > 0 && k <= max_offset)
-                {
-                    aadata->exclusion_mask_gb[i][k-1] = 0;
-                }
-            }
-        }
-
-        /* End */
-
-        /* End */
-        aadata->jindex_gb[3*i+2] = i+1+max_offset;
-    }
-}
-
-
-static void
-genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
-                       t_ilist     *                  ilist,
-                       int                            natoms,
-                       gmx_bool                       bInclude12,
-                       gmx_bool                       bInclude13,
-                       gmx_bool                       bInclude14)
-{
-    gmx_allvsallgb2_data_t *aadata;
-
-    snew(aadata, 1);
-    *p_aadata = aadata;
-
-    setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
-}
-
-
-
-int
-genborn_allvsall_calc_still_radii(t_forcerec *           fr,
-                                  t_mdatoms *            mdatoms,
-                                  gmx_genborn_t *        born,
-                                  gmx_localtop_t *       top,
-                                  real *                 x,
-                                  void *                 work)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2;
-    int                     i, j, k, n;
-    int              *      mask;
-
-    real                    ix, iy, iz;
-    real                    jx, jy, jz;
-    real                    dx, dy, dz;
-    real                    rsq, rinv;
-    real                    gpi, rai, vai;
-    real                    prod_ai;
-    real                    irsq, idr4, idr6;
-    real                    raj, rvdw, ratio;
-    real                    vaj, ccf, dccf, theta, cosq;
-    real                    term, prod, icf4, icf6, gpi2, factor, sinq;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-    factor              = 0.5*ONE_4PI_EPS0;
-    n                   = 0;
-
-    aadata = *((gmx_allvsallgb2_data_t **)work);
-
-    if (aadata == nullptr)
-    {
-        genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
-                               FALSE, FALSE, TRUE);
-        *((gmx_allvsallgb2_data_t **)work) = aadata;
-    }
-
-
-    for (i = 0; i < born->nr; i++)
-    {
-        born->gpol_still_work[i] = 0;
-    }
-
-
-    for (i = ni0; i < ni1; i++)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix                = x[3*i];
-        iy                = x[3*i+1];
-        iz                = x[3*i+2];
-
-        gpi               = 0.0;
-
-        rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
-        vai     = born->vsolv[i];
-        prod_ai = STILL_P4*vai;
-
-        /* Load limits for loop over neighbors */
-        nj0              = aadata->jindex_gb[3*i];
-        nj1              = aadata->jindex_gb[3*i+1];
-        nj2              = aadata->jindex_gb[3*i+2];
-
-        mask             = aadata->exclusion_mask_gb[i];
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j++, mask++)
-        {
-            if (*mask != 0)
-            {
-                k = j%natoms;
-
-                /* load j atom coordinates */
-                jx                = x[3*k];
-                jy                = x[3*k+1];
-                jz                = x[3*k+2];
-
-                /* Calculate distance */
-                dx                = ix - jx;
-                dy                = iy - jy;
-                dz                = iz - jz;
-                rsq               = dx*dx+dy*dy+dz*dz;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv              = gmx::invsqrt(rsq);
-                irsq              = rinv*rinv;
-                idr4              = irsq*irsq;
-                idr6              = idr4*irsq;
-
-                raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
-
-                rvdw  = rai + raj;
-
-                ratio = rsq / (rvdw * rvdw);
-                vaj   = born->vsolv[k];
-
-
-                if (ratio > STILL_P5INV)
-                {
-                    ccf  = 1.0;
-                    dccf = 0.0;
-                }
-                else
-                {
-                    theta = ratio*STILL_PIP5;
-                    cosq  = cos(theta);
-                    term  = 0.5*(1.0-cosq);
-                    ccf   = term*term;
-                    sinq  = 1.0 - cosq*cosq;
-                    dccf  = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
-                }
-
-                prod          = STILL_P4*vaj;
-                icf4          = ccf*idr4;
-                icf6          = (4*ccf-dccf)*idr6;
-
-                born->gpol_still_work[k] += prod_ai*icf4;
-                gpi                       = gpi+prod*icf4;
-
-                /* Save ai->aj and aj->ai chain rule terms */
-                fr->dadx[n++]   = prod*icf6;
-                fr->dadx[n++]   = prod_ai*icf6;
-
-                /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
-
-            }
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j++)
-        {
-            k = j%natoms;
-
-            /* load j atom coordinates */
-            jx                = x[3*k];
-            jy                = x[3*k+1];
-            jz                = x[3*k+2];
-
-            /* Calculate distance */
-            dx                = ix - jx;
-            dy                = iy - jy;
-            dz                = iz - jz;
-            rsq               = dx*dx+dy*dy+dz*dz;
-
-            /* Calculate 1/r and 1/r2 */
-            rinv              = gmx::invsqrt(rsq);
-            irsq              = rinv*rinv;
-            idr4              = irsq*irsq;
-            idr6              = idr4*irsq;
-
-            raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
-
-            rvdw  = rai + raj;
-
-            ratio = rsq / (rvdw * rvdw);
-            vaj   = born->vsolv[k];
-
-            if (ratio > STILL_P5INV)
-            {
-                ccf  = 1.0;
-                dccf = 0.0;
-            }
-            else
-            {
-                theta = ratio*STILL_PIP5;
-                cosq  = cos(theta);
-                term  = 0.5*(1.0-cosq);
-                ccf   = term*term;
-                sinq  = 1.0 - cosq*cosq;
-                dccf  = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
-            }
-
-            prod          = STILL_P4*vaj;
-            icf4          = ccf*idr4;
-            icf6          = (4*ccf-dccf)*idr6;
-
-            born->gpol_still_work[k] += prod_ai*icf4;
-            gpi                       = gpi+prod*icf4;
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            fr->dadx[n++]   = prod*icf6;
-            fr->dadx[n++]   = prod_ai*icf6;
-        }
-        born->gpol_still_work[i] += gpi;
-    }
-
-    /* Parallel summations would go here if ever implemented with DD */
-
-    /* Calculate the radii */
-    for (i = 0; i < natoms; i++)
-    {
-        if (born->use[i] != 0)
-        {
-            gpi             = born->gpol[i]+born->gpol_still_work[i];
-            gpi2            = gpi * gpi;
-            born->bRad[i]   = factor*gmx::invsqrt(gpi2);
-            fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
-        }
-    }
-
-    return 0;
-}
-
-
-
-int
-genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
-                                    t_mdatoms *            mdatoms,
-                                    gmx_genborn_t *        born,
-                                    int                    gb_algorithm,
-                                    gmx_localtop_t *       top,
-                                    real *                 x,
-                                    void *                 work)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2;
-    int                     i, j, k, n;
-    int              *      mask;
-
-    real                    ix, iy, iz;
-    real                    jx, jy, jz;
-    real                    dx, dy, dz;
-    real                    rsq, rinv;
-    real                    prod, raj;
-    real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
-    real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
-    real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
-    real                    tchain;
-    real                    dadxi, dadxj;
-    real                    rad, min_rad;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    n       = 0;
-    doffset = born->gb_doffset;
-
-    aadata = *((gmx_allvsallgb2_data_t **)work);
-
-    if (aadata == nullptr)
-    {
-        genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
-                               TRUE, TRUE, TRUE);
-        *((gmx_allvsallgb2_data_t **)work) = aadata;
-    }
-
-    for (i = 0; i < born->nr; i++)
-    {
-        born->gpol_hct_work[i] = 0;
-    }
-
-    for (i = ni0; i < ni1; i++)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix                = x[3*i];
-        iy                = x[3*i+1];
-        iz                = x[3*i+2];
-
-        rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
-        rai_inv  = 1.0/rai;
-
-        sk_ai    = born->param[i];
-        sk2_ai   = sk_ai*sk_ai;
-
-        sum_ai   = 0;
-
-        /* Load limits for loop over neighbors */
-        nj0              = aadata->jindex_gb[3*i];
-        nj1              = aadata->jindex_gb[3*i+1];
-        nj2              = aadata->jindex_gb[3*i+2];
-
-        mask             = aadata->exclusion_mask_gb[i];
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j++, mask++)
-        {
-            if (*mask != 0)
-            {
-                k = j%natoms;
-
-                /* load j atom coordinates */
-                jx                = x[3*k];
-                jy                = x[3*k+1];
-                jz                = x[3*k+2];
-
-                /* Calculate distance */
-                dx                = ix - jx;
-                dy                = iy - jy;
-                dz                = iz - jz;
-                rsq               = dx*dx+dy*dy+dz*dz;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv              = gmx::invsqrt(rsq);
-                dr                = rsq*rinv;
-
-                /* sk is precalculated in init_gb() */
-                sk    = born->param[k];
-                raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
-
-                /* aj -> ai interaction */
-
-
-                if (rai < dr+sk)
-                {
-                    lij       = 1.0/(dr-sk);
-                    dlij      = 1.0;
-
-                    if (rai > dr-sk)
-                    {
-                        lij  = rai_inv;
-                        dlij = 0.0;
-                    }
-
-                    uij      = 1.0/(dr+sk);
-                    lij2     = lij  * lij;
-                    lij3     = lij2 * lij;
-                    uij2     = uij  * uij;
-                    uij3     = uij2 * uij;
-
-                    diff2    = uij2-lij2;
-
-                    lij_inv  = gmx::invsqrt(lij2);
-                    sk2      = sk*sk;
-                    sk2_rinv = sk2*rinv;
-                    prod     = 0.25*sk2_rinv;
-
-                    log_term = std::log(uij*lij_inv);
-                    /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                    tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
-
-                    if (rai < sk-dr)
-                    {
-                        tmp = tmp + 2.0 * (rai_inv-lij);
-                    }
-
-                    t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                    t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
-
-                    t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                    dadxi = (dlij*t1+t2+t3)*rinv;
-
-                    sum_ai += 0.5*tmp;
-                }
-                else
-                {
-                    dadxi = 0.0;
-                }
-
-                /* ai -> aj interaction */
-                if (raj < dr + sk_ai)
-                {
-                    lij     = 1.0/(dr-sk_ai);
-                    dlij    = 1.0;
-                    raj_inv = 1.0/raj;
-
-                    if (raj > dr-sk_ai)
-                    {
-                        lij  = raj_inv;
-                        dlij = 0.0;
-                    }
-
-                    lij2     = lij  * lij;
-                    lij3     = lij2 * lij;
-
-                    uij      = 1.0/(dr+sk_ai);
-                    uij2     = uij  * uij;
-                    uij3     = uij2 * uij;
-
-                    diff2    = uij2-lij2;
-
-                    lij_inv  = gmx::invsqrt(lij2);
-                    sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
-                    sk2_rinv = sk2*rinv;
-                    prod     = 0.25 * sk2_rinv;
-
-                    /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                    log_term = std::log(uij*lij_inv);
-
-                    tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
-
-                    if (raj < sk_ai-dr)
-                    {
-                        tmp     = tmp + 2.0 * (raj_inv-lij);
-                    }
-
-                    t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                    t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                    t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                    dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule	*/
-
-                    born->gpol_hct_work[k] += 0.5*tmp;
-                }
-                else
-                {
-                    dadxj = 0.0;
-                }
-                fr->dadx[n++] = dadxi;
-                fr->dadx[n++] = dadxj;
-
-            }
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j++)
-        {
-            k = j%natoms;
-
-            /* load j atom coordinates */
-            jx                = x[3*k];
-            jy                = x[3*k+1];
-            jz                = x[3*k+2];
-
-            /* Calculate distance */
-            dx                = ix - jx;
-            dy                = iy - jy;
-            dz                = iz - jz;
-            rsq               = dx*dx+dy*dy+dz*dz;
-
-            /* Calculate 1/r and 1/r2 */
-            rinv              = gmx::invsqrt(rsq);
-            dr                = rsq*rinv;
-
-            /* sk is precalculated in init_gb() */
-            sk    = born->param[k];
-            raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
-
-            /* aj -> ai interaction */
-            if (rai < dr+sk)
-            {
-                lij       = 1.0/(dr-sk);
-                dlij      = 1.0;
-
-                if (rai > dr-sk)
-                {
-                    lij  = rai_inv;
-                    dlij = 0.0;
-                }
-
-                uij      = 1.0/(dr+sk);
-                lij2     = lij  * lij;
-                lij3     = lij2 * lij;
-                uij2     = uij  * uij;
-                uij3     = uij2 * uij;
-
-                diff2    = uij2-lij2;
-
-                lij_inv  = gmx::invsqrt(lij2);
-                sk2      = sk*sk;
-                sk2_rinv = sk2*rinv;
-                prod     = 0.25*sk2_rinv;
-
-                log_term = std::log(uij*lij_inv);
-                /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
-
-                if (rai < sk-dr)
-                {
-                    tmp = tmp + 2.0 * (rai_inv-lij);
-                }
-
-                /* duij    = 1.0; */
-                t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule	*/
-
-                sum_ai += 0.5*tmp;
-            }
-            else
-            {
-                dadxi = 0.0;
-            }
-
-            /* ai -> aj interaction */
-            if (raj < dr + sk_ai)
-            {
-                lij     = 1.0/(dr-sk_ai);
-                dlij    = 1.0;
-                raj_inv = 1.0/raj;
-
-                if (raj > dr-sk_ai)
-                {
-                    lij  = raj_inv;
-                    dlij = 0.0;
-                }
-
-                lij2     = lij  * lij;
-                lij3     = lij2 * lij;
-
-                uij      = 1.0/(dr+sk_ai);
-                uij2     = uij  * uij;
-                uij3     = uij2 * uij;
-
-                diff2    = uij2-lij2;
-
-                lij_inv  = gmx::invsqrt(lij2);
-                sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
-                sk2_rinv = sk2*rinv;
-                prod     = 0.25 * sk2_rinv;
-
-                /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                log_term = std::log(uij*lij_inv);
-
-                tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
-
-                if (raj < sk_ai-dr)
-                {
-                    tmp     = tmp + 2.0 * (raj_inv-lij);
-                }
-
-                t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
-                t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
-                t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
-
-                dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule	*/
-
-                born->gpol_hct_work[k] += 0.5*tmp;
-            }
-            else
-            {
-                dadxj = 0.0;
-            }
-            fr->dadx[n++] = dadxi;
-            fr->dadx[n++] = dadxj;
-        }
-        born->gpol_hct_work[i] += sum_ai;
-    }
-
-    /* Parallel summations would go here if ever implemented with DD */
-
-    if (gb_algorithm == egbHCT)
-    {
-        /* HCT */
-        for (i = 0; i < natoms; i++)
-        {
-            if (born->use[i] != 0)
-            {
-                rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
-                sum_ai  = 1.0/rai - born->gpol_hct_work[i];
-                min_rad = rai + born->gb_doffset;
-                rad     = 1.0/sum_ai;
-
-                born->bRad[i]   = std::max(rad, min_rad);
-                fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
-            }
-        }
-
-    }
-    else
-    {
-        /* OBC */
-        /* Calculate the radii */
-        for (i = 0; i < natoms; i++)
-        {
-            if (born->use[i] != 0)
-            {
-                rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
-                rai_inv2   = 1.0/rai;
-                rai        = rai-doffset;
-                rai_inv    = 1.0/rai;
-                sum_ai     = rai * born->gpol_hct_work[i];
-                sum_ai2    = sum_ai  * sum_ai;
-                sum_ai3    = sum_ai2 * sum_ai;
-
-                tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
-                born->bRad[i] = rai_inv - tsum*rai_inv2;
-                born->bRad[i] = 1.0 / born->bRad[i];
-
-                fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
-
-                tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
-                born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
-            }
-        }
-    }
-    return 0;
-}
-
-
-
-
-
-int
-genborn_allvsall_calc_chainrule(t_forcerec *           fr,
-                                t_mdatoms *            mdatoms,
-                                gmx_genborn_t *        born,
-                                real *                 x,
-                                real *                 f,
-                                int                    gb_algorithm,
-                                void *                 work)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2;
-    int                     i, j, k, n;
-    int                     idx;
-    int              *      mask;
-
-    real                    ix, iy, iz;
-    real                    fix, fiy, fiz;
-    real                    jx, jy, jz;
-    real                    dx, dy, dz;
-    real                    tx, ty, tz;
-    real                    rbai, rbaj, fgb, fgb_ai, rbi;
-    real              *     rb;
-    real              *     dadx;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-    dadx                = fr->dadx;
-
-    aadata = (gmx_allvsallgb2_data_t *)work;
-
-    n  = 0;
-    rb = born->work;
-
-    /* Loop to get the proper form for the Born radius term */
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = 0; i < natoms; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
-        }
-    }
-    else if (gb_algorithm == egbHCT)
-    {
-        for (i = 0; i < natoms; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * fr->dvda[i];
-        }
-    }
-    else if (gb_algorithm == egbOBC)
-    {
-        for (idx = 0; idx < natoms; idx++)
-        {
-            rbi     = born->bRad[idx];
-            rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
-        }
-    }
-
-    for (i = ni0; i < ni1; i++)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix                = x[3*i];
-        iy                = x[3*i+1];
-        iz                = x[3*i+2];
-
-        fix               = 0;
-        fiy               = 0;
-        fiz               = 0;
-
-        rbai              = rb[i];
-
-        /* Load limits for loop over neighbors */
-        nj0              = aadata->jindex_gb[3*i];
-        nj1              = aadata->jindex_gb[3*i+1];
-        nj2              = aadata->jindex_gb[3*i+2];
-
-        mask             = aadata->exclusion_mask_gb[i];
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j++, mask++)
-        {
-            if (*mask != 0)
-            {
-                k = j%natoms;
-
-                /* load j atom coordinates */
-                jx                = x[3*k];
-                jy                = x[3*k+1];
-                jz                = x[3*k+2];
-
-                /* Calculate distance */
-                dx                = ix - jx;
-                dy                = iy - jy;
-                dz                = iz - jz;
-
-                rbaj              = rb[k];
-
-                fgb     = rbai*dadx[n++];
-                fgb_ai  = rbaj*dadx[n++];
-
-                /* Total force between ai and aj is the sum of ai->aj and aj->ai */
-                fgb     = fgb + fgb_ai;
-
-                tx      = fgb * dx;
-                ty      = fgb * dy;
-                tz      = fgb * dz;
-
-                fix     = fix + tx;
-                fiy     = fiy + ty;
-                fiz     = fiz + tz;
-
-                /* Update force on atom aj */
-                f[3*k]   = f[3*k] - tx;
-                f[3*k+1] = f[3*k+1] - ty;
-                f[3*k+2] = f[3*k+2] - tz;
-            }
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j++)
-        {
-            k = j%natoms;
-
-            /* load j atom coordinates */
-            jx                = x[3*k];
-            jy                = x[3*k+1];
-            jz                = x[3*k+2];
-
-            /* Calculate distance */
-            dx                = ix - jx;
-            dy                = iy - jy;
-            dz                = iz - jz;
-
-            rbaj              = rb[k];
-
-            fgb     = rbai*dadx[n++];
-            fgb_ai  = rbaj*dadx[n++];
-
-            /* Total force between ai and aj is the sum of ai->aj and aj->ai */
-            fgb     = fgb + fgb_ai;
-
-            tx      = fgb * dx;
-            ty      = fgb * dy;
-            tz      = fgb * dz;
-
-            fix     = fix + tx;
-            fiy     = fiy + ty;
-            fiz     = fiz + tz;
-
-            /* Update force on atom aj */
-            f[3*k]   = f[3*k] - tx;
-            f[3*k+1] = f[3*k+1] - ty;
-            f[3*k+2] = f[3*k+2] - tz;
-        }
-        /* Update force and shift forces on atom ai */
-        f[3*i]   = f[3*i] + fix;
-        f[3*i+1] = f[3*i+1] + fiy;
-        f[3*i+2] = f[3*i+2] + fiz;
-    }
-
-    return 0;
-}
diff --git a/src/gromacs/mdlib/genborn_allvsall.h b/src/gromacs/mdlib/genborn_allvsall.h
deleted file mode 100644
index da0f3fa7eb..0000000000
--- a/src/gromacs/mdlib/genborn_allvsall.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _GENBORN_ALLVSALL_H
-#define _GENBORN_ALLVSALL_H
-
-#include "gromacs/utility/real.h"
-
-struct gmx_genborn_t;
-struct gmx_localtop_t;
-struct t_forcerec;
-struct t_mdatoms;
-
-int
-genborn_allvsall_calc_still_radii(struct t_forcerec *           fr,
-                                  t_mdatoms        *            mdatoms,
-                                  gmx_genborn_t        *        born,
-                                  gmx_localtop_t        *       top,
-                                  real        *                 x,
-                                  void        *                 work);
-
-int
-genborn_allvsall_calc_hct_obc_radii(struct t_forcerec *           fr,
-                                    t_mdatoms        *            mdatoms,
-                                    gmx_genborn_t        *        born,
-                                    int                           gb_algorithm,
-                                    gmx_localtop_t        *       top,
-                                    real        *                 x,
-                                    void        *                 work);
-
-int
-genborn_allvsall_calc_chainrule(struct t_forcerec *           fr,
-                                t_mdatoms        *            mdatoms,
-                                gmx_genborn_t        *        born,
-                                real        *                 x,
-                                real        *                 f,
-                                int                           gb_algorithm,
-                                void        *                 work);
-
-#endif
diff --git a/src/gromacs/mdlib/mdebin.cpp b/src/gromacs/mdlib/mdebin.cpp
index 49361183ff..3858ba527d 100644
--- a/src/gromacs/mdlib/mdebin.cpp
+++ b/src/gromacs/mdlib/mdebin.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -241,18 +241,6 @@ t_mdebin *init_mdebin(ener_file_t       fp_ene,
         {
             md->bEner[i] = TRUE;
         }
-        else if ((i == F_GBPOL) && ir->implicit_solvent == eisGBSA)
-        {
-            md->bEner[i] = TRUE;
-        }
-        else if ((i == F_NPSOLVATION) && ir->implicit_solvent == eisGBSA && (ir->sa_algorithm != esaNO))
-        {
-            md->bEner[i] = TRUE;
-        }
-        else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14))
-        {
-            md->bEner[i] = FALSE;
-        }
         else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP))
         {
             md->bEner[i] = EI_DYNAMICS(ir->eI);
diff --git a/src/gromacs/mdlib/minimize.cpp b/src/gromacs/mdlib/minimize.cpp
index a27575016d..e1814f97d8 100644
--- a/src/gromacs/mdlib/minimize.cpp
+++ b/src/gromacs/mdlib/minimize.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -769,7 +769,7 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr,
              count, nrnb, wcycle, top, &top_global->groups,
              ems->s.box, ems->s.x, &ems->s.hist,
              ems->f, force_vir, mdAtoms->mdatoms(), enerd, fcd,
-             ems->s.lambda, graph, fr, vsite, mu_tot, t, nullptr, TRUE,
+             ems->s.lambda, graph, fr, vsite, mu_tot, t, nullptr,
              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
              (bNS ? GMX_FORCE_NS : 0),
@@ -2832,7 +2832,6 @@ double do_nm(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
         size_t atom = atom_index[aid];
         for (size_t d = 0; d < DIM; d++)
         {
-            gmx_bool    bBornRadii  = FALSE;
             gmx_int64_t step        = 0;
             int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
             double      t           = 0;
@@ -2861,7 +2860,7 @@ double do_nm(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
                                                constr, enerd, fcd,
                                                &state_work.s, &state_work.f, vir, mdatoms,
                                                nrnb, wcycle, graph, &top_global->groups,
-                                               shellfc, fr, bBornRadii, t, mu_tot,
+                                               shellfc, fr, t, mu_tot,
                                                vsite,
                                                DdOpenBalanceRegionBeforeForceComputation::no,
                                                DdCloseBalanceRegionAfterForceComputation::no);
diff --git a/src/gromacs/mdlib/shellfc.cpp b/src/gromacs/mdlib/shellfc.cpp
index f953a5a6aa..83b7273f52 100644
--- a/src/gromacs/mdlib/shellfc.cpp
+++ b/src/gromacs/mdlib/shellfc.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -987,7 +987,6 @@ void relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose,
                          gmx_groups_t *groups,
                          gmx_shellfc_t *shfc,
                          t_forcerec *fr,
-                         gmx_bool bBornRadii,
                          double t, rvec mu_tot,
                          gmx_vsite_t *vsite,
                          DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
@@ -1121,7 +1120,7 @@ void relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose,
              state->box, state->x, &state->hist,
              force[Min], force_vir, md, enerd, fcd,
              state->lambda, graph,
-             fr, vsite, mu_tot, t, nullptr, bBornRadii,
+             fr, vsite, mu_tot, t, nullptr,
              (bDoNS ? GMX_FORCE_NS : 0) | force_flags,
              ddOpenBalanceRegion, ddCloseBalanceRegion);
 
@@ -1224,7 +1223,7 @@ void relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose,
                  top, groups, state->box, pos[Try], &state->hist,
                  force[Try], force_vir,
                  md, enerd, fcd, state->lambda, graph,
-                 fr, vsite, mu_tot, t, nullptr, bBornRadii,
+                 fr, vsite, mu_tot, t, nullptr,
                  force_flags,
                  ddOpenBalanceRegion, ddCloseBalanceRegion);
 
diff --git a/src/gromacs/mdlib/shellfc.h b/src/gromacs/mdlib/shellfc.h
index c092a1f37a..73831831b7 100644
--- a/src/gromacs/mdlib/shellfc.h
+++ b/src/gromacs/mdlib/shellfc.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -81,7 +81,6 @@ void relax_shell_flexcon(FILE *log, t_commrec *cr, gmx_bool bVerbose,
                          gmx_groups_t *groups,
                          gmx_shellfc_t *shfc,
                          t_forcerec *fr,
-                         gmx_bool bBornRadii,
                          double t, rvec mu_tot,
                          gmx_vsite_t *vsite,
                          DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp
index c2c40c4414..b38c3a8a3e 100644
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -73,7 +73,6 @@
 #include "gromacs/mdlib/constr.h"
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/genborn.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/mdlib/mdrun.h"
 #include "gromacs/mdlib/nb_verlet.h"
@@ -1053,7 +1052,6 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
                                 t_forcerec *fr, interaction_const_t *ic,
                                 gmx_vsite_t *vsite, rvec mu_tot,
                                 double t, gmx_edsam_t ed,
-                                gmx_bool bBornRadii,
                                 int flags,
                                 DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
                                 DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion)
@@ -1538,9 +1536,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
     /* Compute the bonded and non-bonded energies and optionally forces */
     do_force_lowlevel(fr, inputrec, &(top->idef),
                       cr, nrnb, wcycle, mdatoms,
-                      as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd, top, fr->born,
-                      bBornRadii, box,
-                      inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
+                      as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd,
+                      box, inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
                       flags, &cycles_pme);
 
     wallcycle_stop(wcycle, ewcFORCE);
@@ -1762,7 +1759,6 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
                                real *lambda, t_graph *graph,
                                t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
                                double t, gmx_edsam_t ed,
-                               gmx_bool bBornRadii,
                                int flags,
                                DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
                                DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion)
@@ -1930,12 +1926,6 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
         wallcycle_stop(wcycle, ewcNS);
     }
 
-    if (inputrec->implicit_solvent && bNS)
-    {
-        make_gb_nblist(cr, inputrec->gb_algorithm,
-                       as_rvec_array(x.data()), box, fr, &top->idef, graph, fr->born);
-    }
-
     if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME))
     {
         wallcycle_start(wcycle, ewcPPDURINGPME);
@@ -1992,9 +1982,8 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
     /* Compute the bonded and non-bonded energies and optionally forces */
     do_force_lowlevel(fr, inputrec, &(top->idef),
                       cr, nrnb, wcycle, mdatoms,
-                      as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd, top, fr->born,
-                      bBornRadii, box,
-                      inputrec->fepvals, lambda,
+                      as_rvec_array(x.data()), hist, f, &forceWithVirial, enerd, fcd,
+                      box, inputrec->fepvals, lambda,
                       graph, &(top->excls), fr->mu_tot,
                       flags,
                       &cycles_pme);
@@ -2100,7 +2089,6 @@ void do_force(FILE *fplog, t_commrec *cr,
               t_forcerec *fr,
               gmx_vsite_t *vsite, rvec mu_tot,
               double t, gmx_edsam_t ed,
-              gmx_bool bBornRadii,
               int flags,
               DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
               DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion)
@@ -2129,7 +2117,6 @@ void do_force(FILE *fplog, t_commrec *cr,
                                 fr, fr->ic,
                                 vsite, mu_tot,
                                 t, ed,
-                                bBornRadii,
                                 flags,
                                 ddOpenBalanceRegion,
                                 ddCloseBalanceRegion);
@@ -2146,7 +2133,6 @@ void do_force(FILE *fplog, t_commrec *cr,
                                lambda.data(), graph,
                                fr, vsite, mu_tot,
                                t, ed,
-                               bBornRadii,
                                flags,
                                ddOpenBalanceRegion,
                                ddCloseBalanceRegion);
diff --git a/src/gromacs/mdlib/tpi.cpp b/src/gromacs/mdlib/tpi.cpp
index 6588f1522d..b284487a1f 100644
--- a/src/gromacs/mdlib/tpi.cpp
+++ b/src/gromacs/mdlib/tpi.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -664,7 +664,7 @@ double do_tpi(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
                      state_global->box, state_global->x, &state_global->hist,
                      f, force_vir, mdatoms, enerd, fcd,
                      state_global->lambda,
-                     nullptr, fr, nullptr, mu_tot, t, nullptr, FALSE,
+                     nullptr, fr, nullptr, mu_tot, t, nullptr,
                      GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
                      (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS : 0) |
                      (bStateChanged ? GMX_FORCE_STATECHANGED : 0),
diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h
index ce3b8b341f..e4e41decf9 100644
--- a/src/gromacs/mdtypes/forcerec.h
+++ b/src/gromacs/mdtypes/forcerec.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -50,7 +50,6 @@
 struct ForceProviders;
 
 /* Abstract type for PME that is defined only in the routine that use them. */
-struct gmx_genborn_t;
 struct gmx_ns_t;
 struct gmx_pme_t;
 struct nonbonded_verlet_t;
@@ -110,14 +109,10 @@ extern "C" {
 enum {
     enbvdwNONE, enbvdwLJ, enbvdwBHAM, enbvdwTAB, enbvdwNR
 };
-/* OOR is "one over r" -- standard coul */
-enum {
-    enbcoulNONE, enbcoulOOR, enbcoulRF, enbcoulTAB, enbcoulGB, enbcoulFEWALD, enbcoulNR
-};
 
 enum {
     egCOULSR, egLJSR, egBHAMSR,
-    egCOUL14, egLJ14, egGB, egNR
+    egCOUL14, egLJ14, egNR
 };
 extern const char *egrp_nm[egNR+1];
 
@@ -192,7 +187,6 @@ struct t_forcerec {
     gmx_bool bAllvsAll;
     /* Private work data */
     void    *AllvsAll_work;
-    void    *AllvsAll_workgb;
 
     /* Cut-Off stuff.
      * Infinite cut-off's will be GMX_CUTOFF_INF (unlike in t_inputrec: 0).
@@ -320,46 +314,6 @@ struct t_forcerec {
     /* Shell molecular dynamics flexible constraints */
     real fc_stepsize;
 
-    /* Generalized born implicit solvent */
-    gmx_bool              bGB;
-    /* Generalized born stuff */
-    real                  gb_epsilon_solvent;
-    /* Table data for GB */
-    struct t_forcetable  *gbtab;
-    /* VdW radius for each atomtype (dim is thus ntype) */
-    real                 *atype_radius;
-    /* Effective radius (derived from effective volume) for each type */
-    real                 *atype_vol;
-    /* Implicit solvent - surface tension for each atomtype */
-    real                 *atype_surftens;
-    /* Implicit solvent - radius for GB calculation */
-    real                 *atype_gb_radius;
-    /* Implicit solvent - overlap for HCT model */
-    real                 *atype_S_hct;
-    /* Generalized born interaction data */
-    struct gmx_genborn_t *born;
-
-    /* Table scale for GB */
-    real gbtabscale;
-    /* Table range for GB */
-    real gbtabr;
-    /* GB neighborlists (the sr list will contain for each atom all other atoms
-     * (for use in the SA calculation) and the lr list will contain
-     * for each atom all atoms 1-4 or greater (for use in the GB calculation)
-     */
-    struct t_nblist *gblist_sr;
-    struct t_nblist *gblist_lr;
-    struct t_nblist *gblist;
-
-    /* Inverse square root of the Born radii for implicit solvent */
-    real *invsqrta;
-    /* Derivatives of the potential with respect to the Born radii */
-    real *dvda;
-    /* Derivatives of the Born radii with respect to coordinates */
-    real *dadx;
-    real *dadx_rawptr;
-    int   nalloc_dadx; /* Allocated size of dadx */
-
     /* If > 0 signals Test Particle Insertion,
      * the value is the number of atoms of the molecule to insert
      * Only the energy difference due to the addition of the last molecule
diff --git a/src/gromacs/mdtypes/inputrec.cpp b/src/gromacs/mdtypes/inputrec.cpp
index 665e6563c4..e42cfe2385 100644
--- a/src/gromacs/mdtypes/inputrec.cpp
+++ b/src/gromacs/mdtypes/inputrec.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2010, The GROMACS development team.
- * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -927,22 +927,6 @@ void pr_inputrec(FILE *fp, int indent, const char *title, const t_inputrec *ir,
         PR("ewald-geometry", ir->ewald_geometry);
         PR("epsilon-surface", ir->epsilon_surface);
 
-        /* Implicit solvent */
-        PS("implicit-solvent", EIMPLICITSOL(ir->implicit_solvent));
-
-        /* Generalized born electrostatics */
-        PS("gb-algorithm", EGBALGORITHM(ir->gb_algorithm));
-        PI("nstgbradii", ir->nstgbradii);
-        PR("rgbradii", ir->rgbradii);
-        PR("gb-epsilon-solvent", ir->gb_epsilon_solvent);
-        PR("gb-saltconc", ir->gb_saltconc);
-        PR("gb-obc-alpha", ir->gb_obc_alpha);
-        PR("gb-obc-beta", ir->gb_obc_beta);
-        PR("gb-obc-gamma", ir->gb_obc_gamma);
-        PR("gb-dielectric-offset", ir->gb_dielectric_offset);
-        PS("sa-algorithm", ESAALGORITHM(ir->sa_algorithm));
-        PR("sa-surface-tension", ir->sa_surface_tension);
-
         /* Options for weak coupling algorithms */
         PS("tcoupl", ETCOUPLTYPE(ir->etc));
         PI("nsttcouple", ir->nsttcouple);
@@ -1343,18 +1327,6 @@ void cmp_inputrec(FILE *fp, const t_inputrec *ir1, const t_inputrec *ir2, real f
     cmp_real(fp, "inputrec->epsilon_r", -1, ir1->epsilon_r, ir2->epsilon_r, ftol, abstol);
     cmp_real(fp, "inputrec->epsilon_rf", -1, ir1->epsilon_rf, ir2->epsilon_rf, ftol, abstol);
     cmp_real(fp, "inputrec->tabext", -1, ir1->tabext, ir2->tabext, ftol, abstol);
-    cmp_int(fp, "inputrec->implicit_solvent", -1, ir1->implicit_solvent, ir2->implicit_solvent);
-    cmp_int(fp, "inputrec->gb_algorithm", -1, ir1->gb_algorithm, ir2->gb_algorithm);
-    cmp_int(fp, "inputrec->nstgbradii", -1, ir1->nstgbradii, ir2->nstgbradii);
-    cmp_real(fp, "inputrec->rgbradii", -1, ir1->rgbradii, ir2->rgbradii, ftol, abstol);
-    cmp_real(fp, "inputrec->gb_saltconc", -1, ir1->gb_saltconc, ir2->gb_saltconc, ftol, abstol);
-    cmp_real(fp, "inputrec->gb_epsilon_solvent", -1, ir1->gb_epsilon_solvent, ir2->gb_epsilon_solvent, ftol, abstol);
-    cmp_real(fp, "inputrec->gb_obc_alpha", -1, ir1->gb_obc_alpha, ir2->gb_obc_alpha, ftol, abstol);
-    cmp_real(fp, "inputrec->gb_obc_beta", -1, ir1->gb_obc_beta, ir2->gb_obc_beta, ftol, abstol);
-    cmp_real(fp, "inputrec->gb_obc_gamma", -1, ir1->gb_obc_gamma, ir2->gb_obc_gamma, ftol, abstol);
-    cmp_real(fp, "inputrec->gb_dielectric_offset", -1, ir1->gb_dielectric_offset, ir2->gb_dielectric_offset, ftol, abstol);
-    cmp_int(fp, "inputrec->sa_algorithm", -1, ir1->sa_algorithm, ir2->sa_algorithm);
-    cmp_real(fp, "inputrec->sa_surface_tension", -1, ir1->sa_surface_tension, ir2->sa_surface_tension, ftol, abstol);
 
     cmp_int(fp, "inputrec->eDispCorr", -1, ir1->eDispCorr, ir2->eDispCorr);
     cmp_real(fp, "inputrec->shake_tol", -1, ir1->shake_tol, ir2->shake_tol, ftol, abstol);
@@ -1466,8 +1438,7 @@ gmx_bool inputrecNeedMutot(const t_inputrec *ir)
 
 gmx_bool inputrecExclForces(const t_inputrec *ir)
 {
-    return (EEL_FULL(ir->coulombtype) || (EEL_RF(ir->coulombtype)) ||
-            ir->implicit_solvent != eisNO);
+    return (EEL_FULL(ir->coulombtype) || (EEL_RF(ir->coulombtype)));
 }
 
 gmx_bool inputrecNptTrotter(const t_inputrec *ir)
diff --git a/src/gromacs/mdtypes/inputrec.h b/src/gromacs/mdtypes/inputrec.h
index 4094bc02fd..87a715980e 100644
--- a/src/gromacs/mdtypes/inputrec.h
+++ b/src/gromacs/mdtypes/inputrec.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -297,18 +297,7 @@ struct t_inputrec
     real            rcoulomb;                /* Coulomb cutoff (nm)		                */
     real            epsilon_r;               /* relative dielectric constant                 */
     real            epsilon_rf;              /* relative dielectric constant of the RF       */
-    int             implicit_solvent;        /* No (=explicit water), or GBSA solvent models */
-    int             gb_algorithm;            /* Algorithm to use for calculation Born radii  */
-    int             nstgbradii;              /* Frequency of updating Generalized Born radii */
-    real            rgbradii;                /* Cutoff for GB radii calculation              */
-    real            gb_saltconc;             /* Salt concentration (M) for GBSA models       */
-    real            gb_epsilon_solvent;      /* dielectric coeff. of implicit solvent     */
-    real            gb_obc_alpha;            /* 1st scaling factor for Bashford-Case GB      */
-    real            gb_obc_beta;             /* 2nd scaling factor for Bashford-Case GB      */
-    real            gb_obc_gamma;            /* 3rd scaling factor for Bashford-Case GB      */
-    real            gb_dielectric_offset;    /* Dielectric offset for Still/HCT/OBC     */
-    int             sa_algorithm;            /* Algorithm for SA part of GBSA                */
-    real            sa_surface_tension;      /* Energy factor for SA part of GBSA */
+    bool            implicit_solvent;        /* Always false (no longer supported            */
     int             vdwtype;                 /* Type of Van der Waals treatment              */
     int             vdw_modifier;            /* Modify the VdW interaction                   */
     real            rvdw_switch;             /* Van der Waals switch range start (nm)        */
diff --git a/src/gromacs/mdtypes/md_enums.cpp b/src/gromacs/mdtypes/md_enums.cpp
index e47f1e0a0f..de4e0c362d 100644
--- a/src/gromacs/mdtypes/md_enums.cpp
+++ b/src/gromacs/mdtypes/md_enums.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -79,7 +79,7 @@ const char *erefscaling_names[erscNR+1] = {
 const char *eel_names[eelNR+1] = {
     "Cut-off", "Reaction-Field", "Generalized-Reaction-Field",
     "PME", "Ewald", "P3M-AD", "Poisson", "Switch", "Shift", "User",
-    "Generalized-Born", "Reaction-Field-nec", "Encad-shift",
+    "Generalized-Born (unused)", "Reaction-Field-nec", "Encad-shift",
     "PME-User", "PME-Switch", "PME-User-Switch",
     "Reaction-Field-zero", nullptr
 };
@@ -189,18 +189,6 @@ const char *eann_names[eannNR+1] = {
     "No", "Single", "Periodic", nullptr
 };
 
-const char *eis_names[eisNR+1] = {
-    "No", "GBSA", nullptr
-};
-
-const char *egb_names[egbNR+1] = {
-    "Still", "HCT", "OBC", nullptr
-};
-
-const char *esa_names[esaNR+1] = {
-    "Ace-approximation", "None", "Still", nullptr
-};
-
 const char *ewt_names[ewtNR+1] = {
     "9-3", "10-4", "table", "12-6", nullptr
 };
@@ -257,7 +245,7 @@ const char *gmx_nblist_interaction_names[GMX_NBLIST_INTERACTION_NR+1] = {
 
 const char *gmx_nbkernel_elec_names[GMX_NBKERNEL_ELEC_NR+1] =
 {
-    "None", "Coulomb", "Reaction-Field", "Cubic-Spline-Table", "Generalized-Born", "Ewald", nullptr
+    "None", "Coulomb", "Reaction-Field", "Cubic-Spline-Table", "Ewald", nullptr
 };
 
 const char *gmx_nbkernel_vdw_names[GMX_NBKERNEL_VDW_NR+1] =
diff --git a/src/gromacs/mdtypes/md_enums.h b/src/gromacs/mdtypes/md_enums.h
index a0c1565499..af55870361 100644
--- a/src/gromacs/mdtypes/md_enums.h
+++ b/src/gromacs/mdtypes/md_enums.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -159,12 +159,7 @@ extern const char *eintmod_names[eintmodNR+1];
 //! Macro to select the correct string for modifiers
 #define INTMODIFIER(e) enum_name(e, eintmodNR, eintmod_names)
 
-/*! \brief Cut-off treatment for Coulomb
- *
- * eelNOTUSED1 used to be GB, but to enable generalized born with different
- * forms of electrostatics (RF, switch, etc.) in the future it is now selected
- * separately (through the implicit_solvent option).
- */
+/*! \brief Cut-off treatment for Coulomb */
 enum {
     eelCUT,     eelRF,     eelGRF,   eelPME,  eelEWALD,  eelP3M_AD,
     eelPOISSON, eelSWITCH, eelSHIFT, eelUSER, eelGB_NOTUSED, eelRF_NEC_UNSUPPORTED, eelENCADSHIFT,
@@ -469,33 +464,6 @@ extern const char *eann_names[eannNR+1];
 //! And macro for simulated annealing string
 #define EANNEAL(e)      enum_name(e, eannNR, eann_names)
 
-//! Implicit solvent algorithms.
-enum {
-    eisNO, eisGBSA, eisNR
-};
-//! String corresponding to implicit solvent.
-extern const char *eis_names[eisNR+1];
-//! Macro for implicit solvent string.
-#define EIMPLICITSOL(e) enum_name(e, eisNR, eis_names)
-
-//! Algorithms for calculating GB radii.
-enum {
-    egbSTILL, egbHCT, egbOBC, egbNR
-};
-//! String for GB algorithm name.
-extern const char *egb_names[egbNR+1];
-//! Macro for GB string.
-#define EGBALGORITHM(e) enum_name(e, egbNR, egb_names)
-
-//! Surface area algorithm for implicit solvent.
-enum {
-    esaAPPROX, esaNO, esaSTILL, esaNR
-};
-//! String corresponding to surface area algorithm.
-extern const char *esa_names[esaNR+1];
-//! brief Macro for SA algorithm string.
-#define ESAALGORITHM(e) enum_name(e, esaNR, esa_names)
-
 //! Wall types.
 enum {
     ewt93, ewt104, ewtTABLE, ewt126, ewtNR
@@ -636,7 +604,6 @@ enum gmx_nbkernel_elec
     GMX_NBKERNEL_ELEC_COULOMB,
     GMX_NBKERNEL_ELEC_REACTIONFIELD,
     GMX_NBKERNEL_ELEC_CUBICSPLINETABLE,
-    GMX_NBKERNEL_ELEC_GENERALIZEDBORN,
     GMX_NBKERNEL_ELEC_EWALD,
     GMX_NBKERNEL_ELEC_NR
 };
diff --git a/src/gromacs/tables/forcetable.cpp b/src/gromacs/tables/forcetable.cpp
index cfd20340b9..6b2a87692f 100644
--- a/src/gromacs/tables/forcetable.cpp
+++ b/src/gromacs/tables/forcetable.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -1446,74 +1446,6 @@ t_forcetable *make_tables(FILE *out,
     return table;
 }
 
-t_forcetable *make_gb_table(const t_forcerec              *fr)
-{
-    t_tabledata    *td;
-    int             nx0;
-    double          r, r2, Vtab, Ftab, expterm;
-
-    t_forcetable   *table;
-
-    /* Set the table dimensions for GB, not really necessary to
-     * use etiNR (since we only have one table, but ...)
-     */
-    snew(table, 1);
-    snew(td, 1);
-    table->interaction   = GMX_TABLE_INTERACTION_ELEC;
-    table->format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
-    table->r             = fr->gbtabr;
-    table->scale         = fr->gbtabscale;
-    table->n             = static_cast<int>(table->scale*table->r);
-    table->formatsize    = 4;
-    table->ninteractions = 1;
-    table->stride        = table->formatsize*table->ninteractions;
-    nx0                  = 0;
-
-    /* Each table type (e.g. coul,lj6,lj12) requires four numbers per
-     * datapoint. For performance reasons we want the table data to be
-     * aligned on a 32-byte boundary. This new pointer must not be
-     * used in a free() call, but thankfully we're sloppy enough not
-     * to do this :-)
-     */
-
-    snew_aligned(table->data, table->stride*table->n, 32);
-
-    init_table(table->n, nx0, table->scale, &(td[0]), TRUE);
-
-    /* Local implementation so we don't have to use the etabGB
-     * enum above, which will cause problems later when
-     * making the other tables (right now even though we are using
-     * GB, the normal Coulomb tables will be created, but this
-     * will cause a problem since fr->eeltype==etabGB which will not
-     * be defined in fill_table and set_table_type
-     */
-
-    for (int i = nx0; i < table->n; i++)
-    {
-        r       = td->x[i];
-        r2      = r*r;
-        expterm = exp(-0.25*r2);
-
-        Vtab = 1/sqrt(r2+expterm);
-        Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
-
-        /* Convert to single precision when we store to mem */
-        td->x[i]  = i/table->scale;
-        td->v[i]  = Vtab;
-        td->f[i]  = Ftab;
-
-    }
-
-    copy2table(table->n, 0, table->stride, td[0].x, td[0].v, td[0].f, 1.0, table->data);
-
-    done_tabledata(&(td[0]));
-    sfree(td);
-
-    return table;
-
-
-}
-
 bondedtable_t make_bonded_table(FILE *fplog, const char *fn, int angle)
 {
     t_tabledata   td;
diff --git a/src/gromacs/tables/forcetable.h b/src/gromacs/tables/forcetable.h
index 7536f48d1b..73f765a3d6 100644
--- a/src/gromacs/tables/forcetable.h
+++ b/src/gromacs/tables/forcetable.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -143,13 +143,6 @@ t_forcetable *make_tables(FILE *fp,
  */
 bondedtable_t make_bonded_table(FILE *fplog, const char *fn, int angle);
 
-/*! \brief Return a table for GB calculations
- *
- * \param fr   Force record
- * \return     Pointer to new gb table structure
- */
-t_forcetable *make_gb_table(const t_forcerec              *fr);
-
 /*! \brief Construct and return tabulated dispersion and repulsion interactions
  *
  * This table can be used to compute long-range dispersion corrections */
diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp
index 6195dadd27..86f5a7d8bc 100644
--- a/src/gromacs/timing/wallcycle.cpp
+++ b/src/gromacs/timing/wallcycle.cpp
@@ -104,7 +104,7 @@ static const char *wcn[ewcNR] =
 {
     "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load",
     "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
-    "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
+    "Comm. coord.", "Force", "Wait + Comm. F", "PME mesh",
     "PME redist. X/F", "PME spread", "PME gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec",
     "PME wait for PP", "Wait + Recv. PME F",
     "Wait PME GPU spread", "PME 3D-FFT", "PME solve", /* the strings for FFT/solve are repeated here for mixed mode counters */
diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h
index 9698467f7e..6896f87a95 100644
--- a/src/gromacs/timing/wallcycle.h
+++ b/src/gromacs/timing/wallcycle.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -50,7 +50,7 @@ struct t_commrec;
 enum {
     ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD,
     ewcDDCOMMBOUND, ewcVSITECONSTR, ewcPP_PMESENDX, ewcNS, ewcLAUNCH_GPU,
-    ewcMOVEX, ewcGB, ewcFORCE, ewcMOVEF, ewcPMEMESH,
+    ewcMOVEX, ewcFORCE, ewcMOVEF, ewcPMEMESH,
     ewcPME_REDISTXF, ewcPME_SPREAD, ewcPME_GATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE,
     ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF,
     ewcWAIT_GPU_PME_SPREAD, ewcPME_FFT_MIXED_MODE, ewcPME_SOLVE_MIXED_MODE,
diff --git a/src/gromacs/topology/atoms.cpp b/src/gromacs/topology/atoms.cpp
index 0fa17bcd36..61449e4859 100644
--- a/src/gromacs/topology/atoms.cpp
+++ b/src/gromacs/topology/atoms.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -74,11 +74,7 @@ void init_atom(t_atoms *at)
 void init_atomtypes(t_atomtypes *at)
 {
     at->nr         = 0;
-    at->radius     = nullptr;
-    at->vol        = nullptr;
     at->atomnumber = nullptr;
-    at->gb_radius  = nullptr;
-    at->S_hct      = nullptr;
 }
 
 void done_atom(t_atoms *at)
@@ -95,12 +91,7 @@ void done_atom(t_atoms *at)
 void done_atomtypes(t_atomtypes *atype)
 {
     atype->nr = 0;
-    sfree(atype->radius);
-    sfree(atype->vol);
-    sfree(atype->surftens);
     sfree(atype->atomnumber);
-    sfree(atype->gb_radius);
-    sfree(atype->S_hct);
 }
 
 void add_t_atoms(t_atoms *atoms, int natom_extra, int nres_extra)
@@ -332,10 +323,8 @@ void pr_atomtypes(FILE *fp, int indent, const char *title, const t_atomtypes *at
         {
             pr_indent(fp, indent);
             fprintf(fp,
-                    "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
-                    bShowNumbers ? i : -1, atomtypes->radius[i], atomtypes->vol[i],
-                    atomtypes->gb_radius[i],
-                    atomtypes->surftens[i], atomtypes->atomnumber[i], atomtypes->S_hct[i]);
+                    "atomtype[%3d]={atomnumber=%4d}\n",
+                    bShowNumbers ? i : -1, atomtypes->atomnumber[i]);
         }
     }
 }
diff --git a/src/gromacs/topology/atoms.h b/src/gromacs/topology/atoms.h
index 8bb674b67d..59bd270a9f 100644
--- a/src/gromacs/topology/atoms.h
+++ b/src/gromacs/topology/atoms.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -131,11 +131,6 @@ typedef struct t_atoms
 typedef struct t_atomtypes
 {
     int           nr;           /* number of atomtypes                          */
-    real         *radius;       /* GBSA radius for each atomtype                */
-    real         *vol;          /* GBSA efective volume for each atomtype       */
-    real         *surftens;     /* implicit solvent surftens for each atomtype  */
-    real         *gb_radius;    /* GB radius for each atom type                 */
-    real         *S_hct;        /* Overlap factors for HCT/OBC GB models        */
     int          *atomnumber;   /* Atomic number, used for QM/MM                */
 } t_atomtypes;
 
diff --git a/src/gromacs/topology/idef.cpp b/src/gromacs/topology/idef.cpp
index 4806c2a9ba..652a36bb5d 100644
--- a/src/gromacs/topology/idef.cpp
+++ b/src/gromacs/topology/idef.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -273,10 +273,14 @@ void pr_iparams(FILE *fp, t_functype ftype, const t_iparams *iparams)
         case F_VSITEN:
             fprintf(fp, "n=%2d, a=%15.8e\n", iparams->vsiten.n, iparams->vsiten.a);
             break;
-        case F_GB12:
-        case F_GB13:
-        case F_GB14:
-            fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n", iparams->gb.sar, iparams->gb.st, iparams->gb.pi, iparams->gb.gbr, iparams->gb.bmlt);
+        case F_GB12_NOLONGERUSED:
+        case F_GB13_NOLONGERUSED:
+        case F_GB14_NOLONGERUSED:
+            // These could only be generated by grompp, not written in
+            // a .top file. Now that implicit solvent is not
+            // supported, they can't be generated, and the values are
+            // ignored if read from an old .tpr file. So there is
+            // nothing to print.
             break;
         case F_CMAP:
             fprintf(fp, "cmapA=%1d, cmapB=%1d\n", iparams->cmap.cmapA, iparams->cmap.cmapB);
diff --git a/src/gromacs/topology/idef.h b/src/gromacs/topology/idef.h
index 467d7d45f8..822bbf25fb 100644
--- a/src/gromacs/topology/idef.h
+++ b/src/gromacs/topology/idef.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -87,11 +87,11 @@ enum {
     F_PIDIHS,
     F_TABDIHS,
     F_CMAP,
-    F_GB12,
-    F_GB13,
-    F_GB14,
-    F_GBPOL,
-    F_NPSOLVATION,
+    F_GB12_NOLONGERUSED,
+    F_GB13_NOLONGERUSED,
+    F_GB14_NOLONGERUSED,
+    F_GBPOL_NOLONGERUSED,
+    F_NPSOLVATION_NOLONGERUSED,
     F_LJ14,
     F_COUL14,
     F_LJC14_Q,
@@ -268,9 +268,6 @@ typedef union t_iparams
     struct {
         int  table; real kA; real kB;
     } tab;
-    struct {
-        real sar, st, pi, gbr, bmlt;
-    } gb;
     struct {
         int cmapA, cmapB;
     } cmap;
diff --git a/src/gromacs/topology/ifunc.cpp b/src/gromacs/topology/ifunc.cpp
index 376e46402c..f042b5ceaf 100644
--- a/src/gromacs/topology/ifunc.cpp
+++ b/src/gromacs/topology/ifunc.cpp
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -113,11 +113,11 @@ const t_interaction_function interaction_function[F_NRE] =
     def_bonded  ("PIDIHS",   "Improper Dih.",   4, 3, 3,  eNR_IMPROPER, pdihs       ),
     def_bondedt ("TABDIHS", "Tab. Dih.",        4, 2, 2,  eNR_TABDIHS, tab_dihs     ),
     def_bonded  ("CMAP",  "CMAP Dih.",          5, -1, -1,  eNR_CMAP,   unimplemented ),
-    def_bonded  ("GB12",     "GB 1-2 Pol.",     2, 4, 0,  eNR_GB,     unimplemented ),
-    def_bonded  ("GB13",     "GB 1-3 Pol.",     2, 4, 0,  eNR_GB,     unimplemented ),
-    def_bonded  ("GB14",     "GB 1-4 Pol.",     2, 4, 0,  eNR_GB,     unimplemented ),
-    def_nofc    ("GBPOL",    "GB Polarization" ),
-    def_nofc    ("NPSOLVATION", "Nonpolar Sol." ),
+    def_nofc    ("GB12",     "GB 1-2 Pol. (unused)" ),
+    def_nofc    ("GB13",     "GB 1-3 Pol. (unused)" ),
+    def_nofc    ("GB14",     "GB 1-4 Pol. (unused)" ),
+    def_nofc    ("GBPOL",    "GB Polarization (unused)" ),
+    def_nofc    ("NPSOLVATION", "Nonpolar Sol. (unused)" ),
     def_bondedz ("LJ14",     "LJ-14",           2, 2, 2,  eNR_NB14,   unimplemented ),
     def_nofc    ("COUL14",   "Coulomb-14"                                           ),
     def_bondedz ("LJC14_Q",  "LJC-14 q",        2, 5, 0,  eNR_NB14,   unimplemented ),
diff --git a/src/programs/mdrun/md.cpp b/src/programs/mdrun/md.cpp
index 23462bebfc..dae46eb389 100644
--- a/src/programs/mdrun/md.cpp
+++ b/src/programs/mdrun/md.cpp
@@ -321,7 +321,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
     gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
     gmx_bool        bNS, bNStList, bSimAnn, bStopCM,
                     bFirstStep, bInitStep, bLastStep = FALSE,
-                    bBornRadii, bUsingEnsembleRestraints;
+                    bUsingEnsembleRestraints;
     gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
     gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
                       bForceUpdate = FALSE, bCPT;
@@ -1060,13 +1060,6 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
             bLastStep = TRUE;
         }
 
-        /* Determine whether or not to update the Born radii if doing GB */
-        bBornRadii = bFirstStep;
-        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
-        {
-            bBornRadii = TRUE;
-        }
-
         /* do_log triggers energy and virial calculation. Because this leads
          * to different code paths, forces can be different. Thus for exact
          * continuation we should avoid extra log output.
@@ -1207,7 +1200,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
                                 constr, enerd, fcd,
                                 state, &f, force_vir, mdatoms,
                                 nrnb, wcycle, graph, groups,
-                                shellfc, fr, bBornRadii, t, mu_tot,
+                                shellfc, fr, t, mu_tot,
                                 vsite,
                                 ddOpenBalanceRegion, ddCloseBalanceRegion);
         }
@@ -1235,7 +1228,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
                      state->box, state->x, &state->hist,
                      f, force_vir, mdatoms, enerd, fcd,
                      state->lambda, graph,
-                     fr, vsite, mu_tot, t, ed, bBornRadii,
+                     fr, vsite, mu_tot, t, ed,
                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
                      ddOpenBalanceRegion, ddCloseBalanceRegion);
         }
-- 
2.22.0